{
 "cells": [
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "%env LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1\n",
    "%env LLM_API_KEY=sk-替换为自己的Qwen API_KEY"
   ],
   "id": "e35cfbac5da18b14"
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "af375836-b870-458b-87d1-4e00565977eb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:20.742839Z",
     "iopub.status.busy": "2024-12-04T15:04:20.742632Z",
     "iopub.status.idle": "2024-12-04T15:04:20.755500Z",
     "shell.execute_reply": "2024-12-04T15:04:20.754994Z",
     "shell.execute_reply.started": "2024-12-04T15:04:20.742819Z"
    },
    "papermill": {
     "duration": 0.115454,
     "end_time": "2024-11-23T14:29:00.919641",
     "exception": false,
     "start_time": "2024-11-23T14:29:00.804187",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%capture --no-stderr\n",
    "!pip install -U langchain langchain_community langchain_openai pypdf sentence_transformers chromadb shutil"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1e2c72b8-ee12-4130-af88-699998aa230c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:20.756112Z",
     "iopub.status.busy": "2024-12-04T15:04:20.755973Z",
     "iopub.status.idle": "2024-12-04T15:04:20.963038Z",
     "shell.execute_reply": "2024-12-04T15:04:20.962566Z",
     "shell.execute_reply.started": "2024-12-04T15:04:20.756098Z"
    },
    "papermill": {
     "duration": 0.319981,
     "end_time": "2024-11-23T14:29:01.380771",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.060790",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "841d2b02-ad06-40d2-b11f-c7adccec6ca2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:20.964365Z",
     "iopub.status.busy": "2024-12-04T15:04:20.963645Z",
     "iopub.status.idle": "2024-12-04T15:04:20.967414Z",
     "shell.execute_reply": "2024-12-04T15:04:20.966834Z",
     "shell.execute_reply.started": "2024-12-04T15:04:20.964351Z"
    },
    "papermill": {
     "duration": 0.121409,
     "end_time": "2024-11-23T14:29:01.638126",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.516717",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "expr_version = 'split_01_5_markdown_header_text_split_v4'\n",
    "\n",
    "preprocess_output_dir = os.path.join(os.path.pardir, 'outputs', 'v1_20240713')\n",
    "expr_dir = os.path.join(os.path.pardir, 'experiments', expr_version)\n",
    "\n",
    "os.makedirs(expr_dir, exist_ok=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf7e81e3-4c82-4842-aef5-7592caaf1d39",
   "metadata": {
    "papermill": {
     "duration": 0.100379,
     "end_time": "2024-11-23T14:29:01.862379",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.762000",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 读取文档"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e6920e29-bc7d-4635-be06-d151eaf0e100",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:20.968128Z",
     "iopub.status.busy": "2024-12-04T15:04:20.968009Z",
     "iopub.status.idle": "2024-12-04T15:04:22.798220Z",
     "shell.execute_reply": "2024-12-04T15:04:22.797724Z",
     "shell.execute_reply.started": "2024-12-04T15:04:20.968117Z"
    },
    "papermill": {
     "duration": 2.012298,
     "end_time": "2024-11-23T14:29:03.974974",
     "exception": false,
     "start_time": "2024-11-23T14:29:01.962676",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import PyPDFLoader\n",
    "\n",
    "loader = PyPDFLoader(os.path.join(os.path.pardir, 'data', '2024全球经济金融展望报告.pdf'))\n",
    "\n",
    "pdf_documents = loader.load()\n",
    "markdown_documents = open(os.path.join(os.path.pardir, 'outputs', 'MinerU_parsed_20241204', '2024全球经济金融展望报告.md')).read()\n",
    "\n",
    "qa_df = pd.read_excel(os.path.join(preprocess_output_dir, 'question_answer.xlsx'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "841ec659-4ad7-4e1f-b1ea-3477bf97fde3",
   "metadata": {
    "papermill": {
     "duration": 0.100297,
     "end_time": "2024-11-23T14:29:04.219302",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.119005",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 文档切分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "74fe856a-7c19-4c3c-bb30-7abfa6298f74",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.798915Z",
     "iopub.status.busy": "2024-12-04T15:04:22.798691Z",
     "iopub.status.idle": "2024-12-04T15:04:22.806745Z",
     "shell.execute_reply": "2024-12-04T15:04:22.806411Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.798902Z"
    },
    "papermill": {
     "duration": 0.109229,
     "end_time": "2024-11-23T14:29:04.429069",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.319840",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pickle\n",
    "from langchain.text_splitter import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter\n",
    "from uuid import uuid4\n",
    "\n",
    "def split_pdf_docs(documents, filepath, chunk_size=400, chunk_overlap=40, seperators=['\\n\\n\\n', '\\n\\n'], force_split=False):\n",
    "    if os.path.exists(filepath) and not force_split:\n",
    "        print('found cache, restoring...')\n",
    "        return pickle.load(open(filepath, 'rb'))\n",
    "\n",
    "    splitter = RecursiveCharacterTextSplitter(\n",
    "        chunk_size=chunk_size,\n",
    "        chunk_overlap=chunk_overlap,\n",
    "        separators=seperators\n",
    "    )\n",
    "    split_docs = splitter.split_documents(documents)\n",
    "    for chunk in split_docs:\n",
    "        chunk.metadata['uuid'] = str(uuid4())\n",
    "\n",
    "    pickle.dump(split_docs, open(filepath, 'wb'))\n",
    "\n",
    "    return split_docs\n",
    "\n",
    "def split_md_docs(markdown_document):\n",
    "    headers_to_split_on = [\n",
    "        (\"#\", \"Header 1\"),\n",
    "        (\"##\", \"Header 2\"),\n",
    "        (\"###\", \"Header 3\"),\n",
    "    ]\n",
    "    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on, strip_headers=False)\n",
    "    md_header_splits = markdown_splitter.split_text(markdown_document)\n",
    "\n",
    "    return md_header_splits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "aa25540d-0504-4ae7-9804-9e3862b132d5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.808202Z",
     "iopub.status.busy": "2024-12-04T15:04:22.808077Z",
     "iopub.status.idle": "2024-12-04T15:04:22.819851Z",
     "shell.execute_reply": "2024-12-04T15:04:22.819379Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.808190Z"
    },
    "papermill": {
     "duration": 0.145583,
     "end_time": "2024-11-23T14:29:04.677429",
     "exception": false,
     "start_time": "2024-11-23T14:29:04.531846",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "found cache, restoring...\n"
     ]
    }
   ],
   "source": [
    "pdf_splitted_docs = split_pdf_docs(pdf_documents, os.path.join(preprocess_output_dir, 'split_docs.pkl'), chunk_size=500, chunk_overlap=50)\n",
    "md_splitted_docs = split_md_docs(markdown_documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "28d8135e-3fda-4c3b-9c69-059a2f014219",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.820375Z",
     "iopub.status.busy": "2024-12-04T15:04:22.820254Z",
     "iopub.status.idle": "2024-12-04T15:04:22.824816Z",
     "shell.execute_reply": "2024-12-04T15:04:22.824450Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.820363Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "52"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(pdf_splitted_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "c015e2ab-c5f6-4621-ba2a-9c7f26d887ae",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.825421Z",
     "iopub.status.busy": "2024-12-04T15:04:22.825302Z",
     "iopub.status.idle": "2024-12-04T15:04:22.831062Z",
     "shell.execute_reply": "2024-12-04T15:04:22.830702Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.825410Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(page_content='研究院\\n全球经济金融展望报告\\n要点2024年年报（总第57期） 报告日期：2023年12月12日\\n●2023年全球经济增长动力持续回落，各国复苏分化，\\n发达经济体增速明显放缓，新兴经济体整体表现稳定。\\n全球贸易增长乏力，各国生产景气度逐渐回落，内需\\n对经济的拉动作用减弱。欧美央行货币政策紧缩态势\\n放缓，美元指数高位震荡后走弱，全球股市表现总体\\n好于预期，但区域分化明显。高利率环境抑制债券融\\n资需求，债券违约风险持续上升。\\n●展望2024年，预计全球经济复苏将依旧疲软，主要\\n经济体增长态势和货币政策走势将进一步分化。欧美\\n央行大概率结束本轮紧缩货币周期，美元指数将逐步\\n走弱，流向新兴经济体的跨境资本将增加。国际原油\\n市场短缺格局或延续，新能源发展成为重点。\\n●海湾六国经济发展与投资前景、高利率和高债务对\\n美国房地产市场脆弱性的影响等热点问题值得关注。中国银行研究院\\n全球经济金融研究课题组\\n组长：陈卫东\\n副组长：钟红\\n廖淑萍\\n成员：边卫红\\n熊启跃\\n王有鑫\\n曹鸿宇\\n李颖婷\\n王宁远\\n初晓\\n章凯莉\\n黄小军（纽约）\\n陆晓明（纽约）\\n黄承煜（纽约）\\n宋达志（伦敦）\\n李振龙（伦敦）\\n张传捷（伦敦）\\n刘冰彦（法兰克福）\\n温颍坤（法兰克福）\\n张明捷（法兰克福）\\n王哲（东京）\\n李彧（香港）\\n黎永康（香港）\\n联系人：王有鑫\\n电话：010-66594127\\n邮件：wangyouxin_hq@bank-of-china.com主要经济体GDP增速变化趋势（%）\\n资料来源：IMF，中国银行研究院', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 0, 'uuid': 'e73a0c9d-d42b-4350-a4c3-b38bf67c68a5'}),\n",
       " Document(page_content='全球经济金融展望报告\\n中国银行研究院 1 2024年\\n全球经济复苏疲软，货币政策取向分化\\n——中国银行全球经济金融展望报告（2024年）\\n2023年，全球经济增长动力持续回落。分区域看，各国复苏存在较大差异，\\n发达经济体增速明显放缓，新兴经济体增速与2022年大致持平。生产端，全球\\n供应链持续恢复，但生产景气度逐渐回落。需求端，内需对经济的拉动作用逐\\n渐减弱，各国国内投资和跨境投资均持续承压；全球货物贸易量指数和价格指\\n数下行，主要经济体出口贸易同比增速下降。欧美央行货币政策延续收紧态势，\\n但步伐整体放缓；金融体系短期资金运行发生结构性变化，“去存款化”特征\\n突出。美元指数高位震荡后走弱，全球股市表现总体好于预期，但区域分化显\\n著。高利率环境抑制债券融资需求，债券违约风险持续上升，美国政府债务可\\n持续性问题引发市场关注。展望2024年，预计全球经济复苏将依旧疲软，主要\\n经济体增长态势和货币政策将进一步分化。欧美央行大概率结束本轮加息周期，\\n日本央行可能退出负利率政策，跨境资本回流美国趋势将放缓，流向新兴经济\\n体的资金将增加。美元指数将逐步走弱，新兴经济体货币汇率有望回升。国际\\n原油市场短缺格局或延续，新能源发展成为重点。本期报告分别对海湾六国经\\n济发展与投资前景、高利率和高债务对美国房地产市场脆弱性的影响两个专题\\n展开分析。\\n一、全球经济回顾与展望\\n（一）全球经济将在波动分化中筑底复苏\\n2023年，全球经济增长动力持续回落，经济增速连续两年下降。受地缘政\\n治冲突、高通胀、货币政策紧缩等因素影响，全球经济下行压力加大。预计2023\\n年全球GDP增速为2.7%（市场汇率法），较2022年下降0.3个百分点。', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 2, 'uuid': '41d95288-441d-4c02-948a-6a3f0f4ef3ba'}),\n",
       " Document(page_content='全球经济金融展望报告\\n中国银行研究院 2 2024年\\n图1：全球GDP增速（%）\\n资料来源：IMF，中国银行研究院\\n分区域看，全球经济复苏不均衡，各国存在较大差异。发达经济体增速明\\n显放缓，预计2023年增速较2022年下降1个百分点。其中，欧元区和英国经\\n济增速大幅下降，美国表现好于其他发达经济体。2023年三季度，欧元区和英\\n国GDP环比增速均由之前的正增长转为负增长，分别下降0.1%和0.03%；美\\n国GDP环比增长折年率为4.9%，比二季度增速高2.8个百分点。新兴经济体增\\n速与2022年大致持平，预计2023年增速比2022年下降0.1个百分点。其中，\\n东南亚等出口型经济体增长承压，拉美、非洲等大宗商品出口国增速放缓，中\\n东欧国家经济增速加快（图2）。', metadata={'source': 'data/2024全球经济金融展望报告.pdf', 'page': 3, 'uuid': '1f406690-b478-43cd-96f8-cd77924e300e'})]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pdf_splitted_docs[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "414feddc-648f-444b-9988-224e6e6b2fb1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.831652Z",
     "iopub.status.busy": "2024-12-04T15:04:22.831503Z",
     "iopub.status.idle": "2024-12-04T15:04:22.836604Z",
     "shell.execute_reply": "2024-12-04T15:04:22.836292Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.831641Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "47"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(md_splitted_docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "543f6f4e-28c1-4238-ae99-9abab95c2318",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.837246Z",
     "iopub.status.busy": "2024-12-04T15:04:22.837125Z",
     "iopub.status.idle": "2024-12-04T15:04:22.851389Z",
     "shell.execute_reply": "2024-12-04T15:04:22.848766Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.837235Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'Header 1': '全球经济金融展望报告'}, page_content='# 全球经济金融展望报告  \\n2024年年报（总第57期）  \\n报告日期：2023年12月12日'),\n",
       " Document(metadata={'Header 1': '要点'}, page_content='# 要点  \\n●2023 年全球经济增长动力持续回落，各国复苏分化，发达经济体增速明显放缓，新兴经济体整体表现稳定。全球贸易增长乏力，各国生产景气度逐渐回落，内需对经济的拉动作用减弱。欧美央行货币政策紧缩态势放缓，美元指数高位震荡后走弱，全球股市表现总体好于预期，但区域分化明显。高利率环境抑制债券融资需求，债券违约风险持续上升。  \\n$\\\\bullet$ 展望2024 年，预计全球经济复苏将依旧疲软，主要经济体增长态势和货币政策走势将进一步分化。欧美央行大概率结束本轮紧缩货币周期，美元指数将逐步走弱，流向新兴经济体的跨境资本将增加。国际原油市场短缺格局或延续，新能源发展成为重点。  \\n$\\\\bullet$ 海湾六国经济发展与投资前景、高利率和高债务对美国房地产市场脆弱性的影响等热点问题值得关注。  \\n![](images/c7e6ce1606712e84e07a05bcf6016906efa3fc778e40fcd0e91ac4fcb5503b79.jpg)\\n主要经济体GDP 增速变化趋势（%）\\n资料来源：IMF，中国银行研究院'),\n",
       " Document(metadata={'Header 1': '中国银行研究院全球经济金融研究课题组'}, page_content='# 中国银行研究院全球经济金融研究课题组  \\n![](images/a5d0eb181c75231451c8f890ec50fe5822e2306a9beb543ca35a04880abbf639.jpg)  \\n联系人：王有鑫\\n电话：010-66594127\\n邮件： wangyouxin_hq@bank-of-china.com')]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "md_splitted_docs[:3]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4fb9cf39-1221-4b46-ab92-b300dc261c8e",
   "metadata": {},
   "source": [
    "## 检查一下切分后的块长度分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "c76b31aa-28af-430b-a62c-8879905176b7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.854442Z",
     "iopub.status.busy": "2024-12-04T15:04:22.853744Z",
     "iopub.status.idle": "2024-12-04T15:04:22.876989Z",
     "shell.execute_reply": "2024-12-04T15:04:22.875251Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.854374Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count      52.000000\n",
       "mean      623.307692\n",
       "std       258.763920\n",
       "min        65.000000\n",
       "25%       476.750000\n",
       "50%       618.000000\n",
       "75%       801.250000\n",
       "max      1306.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series([len(d.page_content) for d in pdf_splitted_docs]).describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "91e17fe4-4ef8-4768-932e-ed9cfb76eef6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.879298Z",
     "iopub.status.busy": "2024-12-04T15:04:22.878774Z",
     "iopub.status.idle": "2024-12-04T15:04:22.889363Z",
     "shell.execute_reply": "2024-12-04T15:04:22.888501Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.879250Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count      47.000000\n",
       "mean      711.787234\n",
       "std       677.696191\n",
       "min         7.000000\n",
       "25%       244.000000\n",
       "50%       433.000000\n",
       "75%      1040.500000\n",
       "max      2862.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series([len(d.page_content) for d in md_splitted_docs]).describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b7872f43-d308-4eed-9dc0-9ef73cd96ba9",
   "metadata": {},
   "source": [
    "## 检查超长块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "a0370f48-6a02-4aac-a841-5a911182a4af",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.890225Z",
     "iopub.status.busy": "2024-12-04T15:04:22.889994Z",
     "iopub.status.idle": "2024-12-04T15:04:22.895312Z",
     "shell.execute_reply": "2024-12-04T15:04:22.894587Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.890202Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "page_content='# （一）全球经济将在波动分化中筑底复苏  \n",
      "2023年，全球经济增长动力持续回落，经济增速连续两年下降。受地缘政治冲突、高通胀、货币政策紧缩等因素影响，全球经济下行压力加大。预计2023年全球GDP增速为 $2.7\\%$ （市场汇率法），较2022年下降0.3个百分点。  \n",
      "![](images/7600acb45b91442f8127f20629c791d91f04827835929cb12612c409fde82574.jpg)\n",
      "图1：全球GDP增速 $(\\%)$ ）  \n",
      "资料来源：IMF，中国银行研究院  \n",
      "分区域看，全球经济复苏不均衡，各国存在较大差异。发达经济体增速明显放缓，预计2023年增速较2022年下降1个百分点。其中，欧元区和英国经济增速大幅下降，美国表现好于其他发达经济体。2023年三季度，欧元区和英国GDP环比增速均由之前的正增长转为负增长，分别下降 $0.1\\%$ 和 $0.03\\%$ ；美国GDP环比增长折年率为 $4.9\\%$ ，比二季度增速高2.8个百分点。新兴经济体增速与2022年大致持平，预计2023年增速比2022年下降0.1个百分点。其中，东南亚等出口型经济体增长承压，拉美、非洲等大宗商品出口国增速放缓，中东欧国家经济增速加快（图2）。  \n",
      "![](images/abf30ccab508a0c4733d58e3810cda53dabdaeb4239acf37e57a931a0296d80c.jpg)\n",
      "图2：主要经济体GDP增速变化趋势（%）\n",
      "注：东盟五国包含印度尼西亚、马来西亚、菲律宾、新加坡和泰国。  \n",
      "资料来源：IMF，中国银行研究院  \n",
      "从生产端看，全球供应链持续恢复，但生产景气度逐渐回落。截至2023年10月底，纽约联储全球供应链压力指数降至有记录以来的最低值。荷兰经济分析局数据显示，全球工业生产量于4月触及年内低位，5-8月逐月回升，但发达经济体和新兴经济体分化明显（图3）。其中，主要新兴经济体工业生产指数普遍走高，如俄罗斯、土耳其、南非等，而发达经济体中的美国和韩国回升，英国、德国、意大利下行，日本波动较大，整体趋于平稳。全球融资环境收紧和经济下行压力对工业生产前景带来较大影响，全球制造业PMI指数明显回落，从2月的 $49.9\\%$ 降至10月的 $48.8\\%$ 。  \n",
      "![](images/7d2b17776c10d8fc38a113a20b40791a9e65da33b4209516d0bde88163bee3ea.jpg)\n",
      "图3：部分经济体工业生产指数变化趋势（2010年 $\\mathbf{-100}\\rangle$ ）\n",
      "资料来源：荷兰经济分析局，中国银行研究院  \n",
      "从需求端看，内需是支撑发达经济体增长的主要动力，但对经济的拉动作用逐渐减弱。美国消费未受加息明显影响，私人消费维持稳定增长，前三季度对美国经济增长的贡献率高达 $64.4\\%$ ；8-9月，美国零售和食品销售额连续两个月环比增速保持在 $0.7\\%$ 以上，高于市场预期，但10月增速大幅回落至 $-0.1\\%$ 。欧洲各国消费指数整体维持稳定（图4），是上半年免于陷入衰退的主要动力。但随着高利率和高通胀持续，对消费的影响逐渐释放，内需增长动力逐渐弱化，全球服务业PMI指数从二季度开始明显回落，从5月的 $55.5\\%$ 降至10月的 $50.4\\%$ 连续5个月下行；OECD消费者信心指数从7月开始连续3个月回落。  \n",
      "![](images/3876098e7c8b21ca208f46cd2b25aa420574a706ae2648c774fcf130fac892db.jpg)\n",
      "图4：部分欧洲国家零售销售指数\n",
      "注：除英国是以2019年为基年外，其他经济体均为2015年为基年。资料来源：Wind，中国银行研究院  \n",
      "发达经济体投资受加息政策影响较大，国内投资和跨境投资均持续承压。美国私人投资在2023年一季度触底后逐渐反弹，三季度存货及住宅投资恢复增长，带动私人投资增速提升至 $8.4\\%$ （经季调后环比折年率），但制造业和设备投资均放缓，环比增长折年率分别降低 $0.1\\%$ 和 $3.8\\%$ 。欧盟投资增速放缓，房地产投资减少。2023年二季度，欧元区固定资本形成总额环比增长 $0.1\\%$ ，比一季度增速下降0.3个百分点，房地产对GDP环比增长拉动率转为负值。在紧缩货币政策影响下，发达经济体企业部门宏观杠杆率下降，企业加杠杆或负债投资意愿不足。同2022年底相比，2023年二季度，美国、英国、法国、意大利和德国非金融企业部门负债率分别下降了2.4个、3.4个、4.0个、3.0个和1.3个百分点（图5）。IMF预测2023年全球投资率（投资占GDP的比重）将下降1.0个百分点至 $26.4\\%$ （图6），其中，欧盟将下降1.1个百分点，比发达经济体平均降幅高0.2个百分点。从跨境投资角度看，受地缘政治局势紧张、金融领域动荡加剧、高利率和投资审查趋严等影响，并购交易仍然疲软，而在全球产业链重塑背景下，东南亚等区域绿地投资恢复增长。联合国贸发会议预计2023年全球跨境直接投资将继续下行，但降幅较2022年收窄。  \n",
      "![](images/876898312b7f8b55b06bc9b09f7a585aebeb663d67fe281dfb4ed939588a8d6e.jpg)\n",
      "图5：部分发达国家非金融企业部门债务率（%）\n",
      "资料来源：IIF，中国银行研究院\n",
      "图6：全球投资率变化趋势 $(\\,\\%)$ ）  \n",
      "![](images/5c049cdfb254bf5b7720e25c09f7e2e434c5c77b74897dfb28ee5154f4ef318e.jpg)\n",
      "资料来源：IMF，中国银行研究院  \n",
      "从国际贸易角度看，全球货物贸易量和价格指数均承压下行，主要经济体出口贸易同比增速下降。荷兰经济分析局数据显示，2023年1-8月，全球货物贸易量指数和价格指数均震荡下行，8月数值比1月分别下降0.9个和4.3个点（图7）。10月，世贸组织将2023年全球货物贸易增速预测值下调0.9个百分点至 $0.8\\%$ ，2023年国际贸易增长或为近几年最低水平。但近期东亚、东南亚等主要经济体出口下行趋势收窄，贸易呈现企稳迹象。9-10月，越南出口结束连续10个月的负增长态势，同比分别增长 $5.0\\%$ 和 $6.7\\%$ 。10月，韩国出口同比增长 $5.1\\%$ ，是自2022年10月以来首次正增长。  \n",
      "![](images/5826ae44f43ef12c95089d898a8b9375a7e989c7ba7a6de6388cbdd174b65516.jpg)\n",
      "图7：全球货物贸易量指数和货物贸易价格指数（2010年 $\\mathbf{-100}.$ ）\n",
      "资料来源：荷兰经济分析局，中国银行研究院' metadata={'Header 1': '（一）全球经济将在波动分化中筑底复苏'}\n"
     ]
    }
   ],
   "source": [
    "for d in md_splitted_docs:\n",
    "    if len(d.page_content) > 2000:\n",
    "        print(d)\n",
    "        break"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ec5e814-0a7e-4910-b3d1-e56343aded72",
   "metadata": {},
   "source": [
    "考虑到有不少切片依然非常大，此处对较大的片段做二次切分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "44de0a4c-df8f-4cd2-84ab-32d7c56df772",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.896286Z",
     "iopub.status.busy": "2024-12-04T15:04:22.896082Z",
     "iopub.status.idle": "2024-12-04T15:04:22.905959Z",
     "shell.execute_reply": "2024-12-04T15:04:22.905405Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.896266Z"
    }
   },
   "outputs": [],
   "source": [
    "from langchain.text_splitter import MarkdownTextSplitter\n",
    "\n",
    "new_md_splitted_docs = []\n",
    "splitter = MarkdownTextSplitter(\n",
    "    chunk_size=500,\n",
    "    chunk_overlap=50\n",
    ")\n",
    "for doc in md_splitted_docs:\n",
    "    if len(doc.page_content) > 700:\n",
    "        small_chunks = splitter.split_documents([doc])\n",
    "        # 把原始文档的标题回小片段的正文\n",
    "        for doc in small_chunks[1:]:\n",
    "            header_prefix = ''\n",
    "            for head_level in range(1, 4):\n",
    "                if f'Header {head_level}' in doc.metadata:\n",
    "                    header_prefix += '#' * head_level + ' ' + doc.metadata[f'Header {head_level}'] + '\\n'\n",
    "            doc.page_content = header_prefix + doc.page_content\n",
    "        \n",
    "        new_md_splitted_docs.extend(small_chunks)\n",
    "    else:\n",
    "        new_md_splitted_docs.append(doc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "d1ef80c2-b46c-4a4c-abcb-8e1c9dfc836e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.906673Z",
     "iopub.status.busy": "2024-12-04T15:04:22.906517Z",
     "iopub.status.idle": "2024-12-04T15:04:22.913499Z",
     "shell.execute_reply": "2024-12-04T15:04:22.913080Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.906658Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "102"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(new_md_splitted_docs)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "220dbc3a-fceb-4e49-a3f1-01e16660b2a6",
   "metadata": {
    "papermill": {
     "duration": 0.100209,
     "end_time": "2024-11-23T14:29:05.255871",
     "exception": false,
     "start_time": "2024-11-23T14:29:05.155662",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 检索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "8598a11c-25d8-4af1-a98b-06a8c394e261",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:22.914206Z",
     "iopub.status.busy": "2024-12-04T15:04:22.914051Z",
     "iopub.status.idle": "2024-12-04T15:04:23.768435Z",
     "shell.execute_reply": "2024-12-04T15:04:23.767980Z",
     "shell.execute_reply.started": "2024-12-04T15:04:22.914191Z"
    },
    "papermill": {
     "duration": 0.989203,
     "end_time": "2024-11-23T14:29:06.345534",
     "exception": false,
     "start_time": "2024-11-23T14:29:05.356331",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "device: cuda\n"
     ]
    }
   ],
   "source": [
    "from langchain.embeddings import HuggingFaceBgeEmbeddings\n",
    "from langchain_community.vectorstores import Chroma\n",
    "import torch\n",
    "\n",
    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
    "print(f'device: {device}')\n",
    "\n",
    "def get_embeddings(model_path):\n",
    "    embeddings = HuggingFaceBgeEmbeddings(\n",
    "        model_name=model_path,\n",
    "        model_kwargs={'device': device},\n",
    "        encode_kwargs={'normalize_embeddings': True},\n",
    "        # show_progress=True\n",
    "        query_instruction='为这个句子生成表示以用于检索相关文章：'\n",
    "    )\n",
    "    return embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "f6f46c73-7369-448f-a89a-ed3d817cad47",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:23.769246Z",
     "iopub.status.busy": "2024-12-04T15:04:23.769026Z",
     "iopub.status.idle": "2024-12-04T15:04:26.757680Z",
     "shell.execute_reply": "2024-12-04T15:04:26.757194Z",
     "shell.execute_reply.started": "2024-12-04T15:04:23.769233Z"
    },
    "papermill": {
     "duration": 83.983138,
     "end_time": "2024-11-23T14:35:06.117207",
     "exception": false,
     "start_time": "2024-11-23T14:33:42.134069",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import shutil\n",
    "\n",
    "from tqdm.auto import tqdm\n",
    "from langchain_community.vectorstores import Chroma\n",
    "\n",
    "model_path = 'BAAI/bge-large-zh-v1.5'\n",
    "embeddings = get_embeddings(model_path)\n",
    "\n",
    "def get_vector_db(splitted_docs, embeddings, name):\n",
    "    persist_directory = os.path.join(expr_dir, 'chroma', 'bge', name)\n",
    "    shutil.rmtree(persist_directory, ignore_errors=True)\n",
    "    vector_db = Chroma.from_documents(\n",
    "        splitted_docs,\n",
    "        embedding=embeddings,\n",
    "        persist_directory=persist_directory\n",
    "    )\n",
    "    return vector_db"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3318f9bb-a7f8-4c44-bf8d-302b71dca44c",
   "metadata": {},
   "source": [
    "使用新的切分方式，每个切片的UUID跟原始切片不一致了，检索的Ground Truth丢失了，此处通过向量检索的方式，将原始的UUID复制到Markdown的切片上，方便后续排查检索问题"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "b48362cc-5776-4f1c-8feb-64b1a4a675e8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:26.758442Z",
     "iopub.status.busy": "2024-12-04T15:04:26.758197Z",
     "iopub.status.idle": "2024-12-04T15:04:43.940577Z",
     "shell.execute_reply": "2024-12-04T15:04:43.938191Z",
     "shell.execute_reply.started": "2024-12-04T15:04:26.758429Z"
    }
   },
   "outputs": [],
   "source": [
    "pdf_vector_db = get_vector_db(pdf_splitted_docs, embeddings, 'pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "dabf2d44-5afa-41f4-bd6c-1cbaaf00e571",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:43.943452Z",
     "iopub.status.busy": "2024-12-04T15:04:43.942657Z",
     "iopub.status.idle": "2024-12-04T15:04:49.322058Z",
     "shell.execute_reply": "2024-12-04T15:04:49.321576Z",
     "shell.execute_reply.started": "2024-12-04T15:04:43.943381Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_1294088/1778381455.py:5: UserWarning: Relevance scores must be between 0 and 1, got [(Document(metadata={'page': 46, 'source': 'data/2024全球经济金融展望报告.pdf', 'uuid': '873ee876-8549-49b9-b182-6584113b2701'}, page_content='全球经济金融展望报告\\n中国银行研究院 45 2024年\\n第四，中海深化经贸合作将为人民币国际化搭建新的平台。“石油美元”\\n体系对于非美元国家能源贸易存在不同程度的制约。在该体系下，中东石油出\\n口国需要在一定程度上放弃独立的货币政策，承担较大的汇率风险。尤其是近\\n年来美元逐渐被“政治化”，各国对于贸易计价与结算货币多元化的需求不断\\n增加。当前，中国与海湾六国拥有紧密的经贸联系，并且随着19个阿拉伯国家\\n相继加入“一带一路”倡议，人民币使用场景不断拓宽，为人民币国际化创造\\n了有利条件。未来，中海有望以双边经贸为依托，将人民币跨境贸易支付作为\\n人民币在中东地区使用的主要切入点，扩展与海湾六国在人民币储备和投资等\\n领域的货币合作，共同开发绿色金融，充分释放中海双方在推动人民币国际化\\n上的合作潜力。\\n专题二：高利率、高债务与美国房地产市场脆弱性\\n近期，美联储考察了影响金融稳定的风险因素，商业和居住房地产风险的\\n排名从2023年5月的第四位上升至10月的第二位，仅次于持续通胀与货币政\\n策紧缩风险。美国房地产市场的演进路径及风险传染引发市场高度关注。\\n（一）居住房地产市场\\n2023年二季度，美国居住房地产市值余额为56.3万亿美元，不仅远大于商\\n业房地产的24万亿美元，且在各类资产中排名第一。房地产在居民净资产中占\\n比达30%，房贷在居民债务中占比达66%，该市场的走势对美国金融市场、经\\n济走势影响巨大。鉴于该市场对利率及债务高度敏感，美联储加息及居民债务\\n负担上升将增加金融脆弱性。2022年3月以来，美联储激进加息，30年期住房\\n抵押贷款固定利率月均值从4%的历史低位逐渐上升至2023年10月的7.6%，远\\n高于次贷危机发生之前的6.5%。与次贷危机相比，本轮货币紧缩周期中居住房\\n地产市场整体相对稳定，但潜在风险可能上升。\\n第一，在疫情发生后，美国居住房地产市场需求持续上升。美联储对银行'), -0.05529412693199687)]\n",
      "  chunk_score_pair = pdf_vector_db.similarity_search_with_relevance_scores(query, k=1)[0]\n",
      "/tmp/ipykernel_1294088/1778381455.py:5: UserWarning: Relevance scores must be between 0 and 1, got [(Document(metadata={'page': 33, 'source': 'data/2024全球经济金融展望报告.pdf', 'uuid': '3e312dfa-dd43-4ab9-961f-a2e442c89cdd'}, page_content='全球经济金融展望报告\\n中国银行研究院 32 2024年\\n图19：美国联邦基金目标利率与全球MSCI指数\\n资料来源：Wind，中国银行研究院\\n表3：全球主要股指概览\\n注：涨跌幅区间为2023年1月1日至2023年11月15日，收盘价和市盈\\n率为2023年11月15日。\\n资料来源：Wind，中国银行研究院'), -0.04982325594463921)]\n",
      "  chunk_score_pair = pdf_vector_db.similarity_search_with_relevance_scores(query, k=1)[0]\n"
     ]
    }
   ],
   "source": [
    "# 以新切片作为query，查询旧切片中最相似的那个，将它的UUID复制到新切片中\n",
    "for doc in new_md_splitted_docs:\n",
    "    query = doc.page_content\n",
    "    # 只检索最相似的那个\n",
    "    chunk_score_pair = pdf_vector_db.similarity_search_with_relevance_scores(query, k=1)[0]\n",
    "    doc.metadata['uuid'] = chunk_score_pair[0].metadata['uuid']\n",
    "    doc.metadata['pdf_chunk_sim'] = chunk_score_pair[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5a655299-15f5-44b1-925d-5137a1e1c881",
   "metadata": {},
   "source": [
    "chunk_score_pair的结构如下"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "46817374-ceb2-486a-a7f2-240c2abc98f0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:49.322919Z",
     "iopub.status.busy": "2024-12-04T15:04:49.322597Z",
     "iopub.status.idle": "2024-12-04T15:04:49.325729Z",
     "shell.execute_reply": "2024-12-04T15:04:49.325426Z",
     "shell.execute_reply.started": "2024-12-04T15:04:49.322906Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Document(metadata={'page': 51, 'source': 'data/2024全球经济金融展望报告.pdf', 'uuid': 'ebf0d999-59f6-4fd3-941e-05a7a60c255a'}, page_content='免责声明\\n本研究报告由中国银行研究院撰写，研究报告中所引用信息均来自公开资料。\\n本研究报告中包含的观点或估计仅代表作者迄今为止的判断，它们不一定反映中国银行的观点。中国\\n银行研究院可以不经通知加以改变，且没有对此报告更新、修正或修改的责任。\\n本研究报告内容及观点仅供参考，不构成任何投资建议。对于本报告所提供信息所导致的任何直接的\\n或者间接的投资盈亏后果不承担任何责任。\\n本研究报告版权仅为中国银行研究院所有，未经书面许可，任何机构和个人不得以任何形式翻版、复\\n制和发布。如引用发布，需注明出处为中国银行研究院，且不得对本报告进行有悖原意的引用、删节和修\\n改。中国银行研究院保留对任何侵权行为和有悖报告原意的引用行为进行追究的权利。'),\n",
       " 0.8652316217743071)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "chunk_score_pair"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "62560d74-7d90-4e69-ae43-162b248e1622",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:04:49.327946Z",
     "iopub.status.busy": "2024-12-04T15:04:49.327820Z",
     "iopub.status.idle": "2024-12-04T15:05:07.975512Z",
     "shell.execute_reply": "2024-12-04T15:05:07.975066Z",
     "shell.execute_reply.started": "2024-12-04T15:04:49.327934Z"
    }
   },
   "outputs": [],
   "source": [
    "md_vector_db = get_vector_db(new_md_splitted_docs, embeddings, 'md')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "55d51ebc-b29d-45be-b8c7-1d5610b270b8",
   "metadata": {},
   "source": [
    "# 计算检索准确率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "ad8ef473-7ad8-43d4-8b9a-9890cf3bf4c6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:05:07.976081Z",
     "iopub.status.busy": "2024-12-04T15:05:07.975945Z",
     "iopub.status.idle": "2024-12-04T15:05:07.979486Z",
     "shell.execute_reply": "2024-12-04T15:05:07.978973Z",
     "shell.execute_reply.started": "2024-12-04T15:05:07.976068Z"
    }
   },
   "outputs": [],
   "source": [
    "test_df = qa_df[(qa_df['dataset'] == 'test') & (qa_df['qa_type'] == 'detailed')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "070b78ef-3140-4e59-886c-09c5184a8ee9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:05:07.980184Z",
     "iopub.status.busy": "2024-12-04T15:05:07.980055Z",
     "iopub.status.idle": "2024-12-04T15:05:07.993382Z",
     "shell.execute_reply": "2024-12-04T15:05:07.992935Z",
     "shell.execute_reply.started": "2024-12-04T15:05:07.980172Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "93"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(test_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "435148a0-b2b1-49fb-8eea-2ad117c0b9d4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:05:07.994058Z",
     "iopub.status.busy": "2024-12-04T15:05:07.993886Z",
     "iopub.status.idle": "2024-12-04T15:05:07.999011Z",
     "shell.execute_reply": "2024-12-04T15:05:07.998580Z",
     "shell.execute_reply.started": "2024-12-04T15:05:07.994047Z"
    }
   },
   "outputs": [],
   "source": [
    "def get_hit_stat_df(vector_db, top_k_arr=list(range(1, 9))):\n",
    "    hit_stat_data = []\n",
    "\n",
    "    for k in tqdm(top_k_arr):\n",
    "        for idx, row in test_df.iterrows():\n",
    "            question = row['question']\n",
    "            true_uuid = row['uuid']\n",
    "            # chunks = retrieve_fn(question, k=k)\n",
    "            chunks = vector_db.similarity_search(question, k=k)\n",
    "            retrieved_uuids = [doc.metadata['uuid'] for doc in chunks]\n",
    "\n",
    "            hit_stat_data.append({\n",
    "                'question': question,\n",
    "                'top_k': k,\n",
    "                'hit': int(true_uuid in retrieved_uuids),\n",
    "                'retrieved_chunks': len(chunks)\n",
    "            })\n",
    "    hit_stat_df = pd.DataFrame(hit_stat_data)\n",
    "    return hit_stat_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "01e01af2-9f53-462a-bcb1-2864864e6488",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:05:07.999631Z",
     "iopub.status.busy": "2024-12-04T15:05:07.999462Z",
     "iopub.status.idle": "2024-12-04T15:05:27.954039Z",
     "shell.execute_reply": "2024-12-04T15:05:27.953567Z",
     "shell.execute_reply.started": "2024-12-04T15:05:07.999619Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9b0ca1ad158d40a69a31198eda743aba",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/8 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "hit_stat_df = get_hit_stat_df(md_vector_db)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "de0c3de0-92b5-4804-a374-108984640cf8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:05:27.954632Z",
     "iopub.status.busy": "2024-12-04T15:05:27.954500Z",
     "iopub.status.idle": "2024-12-04T15:05:27.961318Z",
     "shell.execute_reply": "2024-12-04T15:05:27.960990Z",
     "shell.execute_reply.started": "2024-12-04T15:05:27.954620Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>top_k</th>\n",
       "      <th>hit_rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0.397849</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0.483871</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0.516129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>0.602151</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0.634409</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>0.677419</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>0.709677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>0.731183</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   top_k  hit_rate\n",
       "0      1  0.397849\n",
       "1      2  0.483871\n",
       "2      3  0.516129\n",
       "3      4  0.602151\n",
       "4      5  0.634409\n",
       "5      6  0.677419\n",
       "6      7  0.709677\n",
       "7      8  0.731183"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hit_stat_df.groupby(['top_k'])['hit'].mean().reset_index().rename(columns={'hit': 'hit_rate'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "ccc0dca9-8ad6-4d0c-a6e1-8279babbdfbf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:05:27.961857Z",
     "iopub.status.busy": "2024-12-04T15:05:27.961733Z",
     "iopub.status.idle": "2024-12-04T15:05:28.325526Z",
     "shell.execute_reply": "2024-12-04T15:05:28.325179Z",
     "shell.execute_reply.started": "2024-12-04T15:05:27.961845Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='top_k', ylabel='hit'>"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGxCAYAAACeKZf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAmTElEQVR4nO3df3BU9b3/8ddmIQkIBDEkgRhJASUEJbGJSQNXobo2Fx0r9956o4MmXWnufCtrY/erX4i0iaiwWDWGQYYIEvFqKVgv/ugV44/VYKmxwSAtVEVphURhEzLWBKMmurvfPxzXbgkYMNmz+eT5mDkz7GfPyb5PHeuTs2eztmAwGBQAAIAhYqweAAAAoD8RNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMMszqASItEAjo0KFDGj16tGw2m9XjAACAPggGgzp69KgmTpyomJgTX5sZcnFz6NAhpaWlWT0GAAA4BS0tLTrzzDNPuM+Qi5vRo0dL+vJ/nDFjxlg8DQAA6IvOzk6lpaWF/jt+IkMubr56K2rMmDHEDQAAg0xfbinhhmIAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYZZvUAAACgf9x2221Wj9Avvu15cOUGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGCUqfkPxmjVrdPfdd8vn8ykrK0urV69WXl5er/vOnTtX27dvP2b9sssu0zPPPDPQowIABoG3lr9k9Qj9YvrSi60eYVCy/MrNli1b5Ha7VVlZqV27dikrK0uFhYVqa2vrdf+tW7fq8OHDoW3v3r2y2+266qqrIjw5AACIRpbHTVVVlUpLS+V0OpWZmamamhqNHDlStbW1ve4/btw4paSkhLYXXnhBI0eOJG4AAIAki+Omp6dHTU1NcjgcobWYmBg5HA41NDT06Wds2LBBV199tU477bSBGhMAAAwilt5z097eLr/fr+Tk5LD15ORkvf322994fGNjo/bu3asNGzYcd5/u7m51d3eHHnd2dp76wAAAIOpZ/rbUt7Fhwwadd955x735WJI8Ho8SEhJCW1paWgQnBAAAkWZp3CQmJsput6u1tTVsvbW1VSkpKSc8tqurS5s3b9bChQtPuF95ebk6OjpCW0tLy7eeGwAARC9L4yY2NlY5OTnyer2htUAgIK/Xq4KCghMe+9vf/lbd3d269tprT7hfXFycxowZE7YBAABzWf57btxut0pKSpSbm6u8vDxVV1erq6tLTqdTklRcXKzU1FR5PJ6w4zZs2KD58+frjDPOsGJsAAAQpSyPm6KiIh05ckQVFRXy+XzKzs5WXV1d6Cbj5uZmxcSEX2Dat2+fduzYoeeff96KkQEAQBSzPG4kyeVyyeVy9fpcfX39MWvTpk1TMBgc4KkAAMBgNKg/LQUAAPDPiBsAAGAU4gYAABiFuAEAAEaJihuKAQADY/m1P7J6hH6x9NHHrR4BgwhXbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGGWb1AAAQCff/399ZPUK/cN17hdUjAFGPKzcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMYnncrFmzRunp6YqPj1d+fr4aGxtPuP9HH32kRYsWacKECYqLi9M555yjbdu2RWhaAAAQ7Sz94swtW7bI7XarpqZG+fn5qq6uVmFhofbt26ekpKRj9u/p6dGll16qpKQkPf7440pNTdXBgwc1duzYyA8PAACikqVxU1VVpdLSUjmdTklSTU2NnnnmGdXW1mrJkiXH7F9bW6sPP/xQr776qoYPHy5JSk9Pj+TIAAAgyln2tlRPT4+amprkcDi+HiYmRg6HQw0NDb0e8/TTT6ugoECLFi1ScnKyzj33XK1YsUJ+vz9SYwMAgChn2ZWb9vZ2+f1+JScnh60nJyfr7bff7vWYv/3tb3rppZe0YMECbdu2Tfv379cNN9ygzz//XJWVlb0e093dre7u7tDjzs7O/jsJYBDaftEcq0foF3Ne2W71CACilOU3FJ+MQCCgpKQkrVu3Tjk5OSoqKtLSpUtVU1Nz3GM8Ho8SEhJCW1paWgQnBgAAkWZZ3CQmJsput6u1tTVsvbW1VSkpKb0eM2HCBJ1zzjmy2+2htenTp8vn86mnp6fXY8rLy9XR0RHaWlpa+u8kAABA1LEsbmJjY5WTkyOv1xtaCwQC8nq9Kigo6PWY2bNna//+/QoEAqG1d955RxMmTFBsbGyvx8TFxWnMmDFhGwAAMJelb0u53W6tX79eDz/8sN566y399Kc/VVdXV+jTU8XFxSovLw/t/9Of/lQffvihysrK9M477+iZZ57RihUrtGjRIqtOAQAARBlLPwpeVFSkI0eOqKKiQj6fT9nZ2aqrqwvdZNzc3KyYmK/7Ky0tTc8995x+/vOfa+bMmUpNTVVZWZkWL15s1SkAAIAoY2ncSJLL5ZLL5er1ufr6+mPWCgoK9Nprrw3wVAAAYLAaVJ+WAgAA+CbEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADDKMKsHAKwye/Vsq0foF3+48Q9WjwAAUYUrNwAAwCjEDQAAMEpUxM2aNWuUnp6u+Ph45efnq7Gx8bj7bty4UTabLWyLj4+P4LQAACCaWR43W7ZskdvtVmVlpXbt2qWsrCwVFhaqra3tuMeMGTNGhw8fDm0HDx6M4MQAACCaWR43VVVVKi0tldPpVGZmpmpqajRy5EjV1tYe9xibzaaUlJTQlpycHMGJAQBANLM0bnp6etTU1CSHwxFai4mJkcPhUENDw3GP+/jjjzVp0iSlpaXpyiuv1F/+8pdIjAsAAAYBS+Omvb1dfr//mCsvycnJ8vl8vR4zbdo01dbW6qmnntKjjz6qQCCgWbNm6f333+91/+7ubnV2doZtAADAXJa/LXWyCgoKVFxcrOzsbM2ZM0dbt27V+PHj9cADD/S6v8fjUUJCQmhLS0uL8MQAACCSLI2bxMRE2e12tba2hq23trYqJSWlTz9j+PDhOv/887V///5eny8vL1dHR0doa2lp+dZzAwCA6GVp3MTGxionJ0derze0FggE5PV6VVBQ0Kef4ff7tWfPHk2YMKHX5+Pi4jRmzJiwDQAAmMvyr19wu90qKSlRbm6u8vLyVF1dra6uLjmdTklScXGxUlNT5fF4JEm33367vve972nq1Kn66KOPdPfdd+vgwYP6yU9+YuVpAACAKGF53BQVFenIkSOqqKiQz+dTdna26urqQjcZNzc3Kybm6wtMf//731VaWiqfz6fTTz9dOTk5evXVV5WZmWnVKQAAgChiedxIksvlksvl6vW5+vr6sMf33Xef7rvvvghMBQAABqNB92kpAACAEyFuAACAUaLibSlYq/n286weoV+cVbHH6hEAAFGAKzcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCjEDQAAMApxAwAAjELcAAAAoxA3AADAKMQNAAAwCnEDAACMEhVxs2bNGqWnpys+Pl75+flqbGzs03GbN2+WzWbT/PnzB3ZAAAAwaFgeN1u2bJHb7VZlZaV27dqlrKwsFRYWqq2t7YTHHThwQDfffLMuvPDCCE0KAAAGA8vjpqqqSqWlpXI6ncrMzFRNTY1Gjhyp2tra4x7j9/u1YMECLVu2TJMnT47gtAAAINpZGjc9PT1qamqSw+EIrcXExMjhcKihoeG4x91+++1KSkrSwoULv/E1uru71dnZGbYBAABzWRo37e3t8vv9Sk5ODltPTk6Wz+fr9ZgdO3Zow4YNWr9+fZ9ew+PxKCEhIbSlpaV967kBAED0Gmb1ACfj6NGjuu6667R+/XolJib26Zjy8nK53e7Q487OzuMGTs4t/90vc1qt6e5iq0cAAMAypxQ3F198sbZu3aqxY8eGrXd2dmr+/Pl66aWX+vRzEhMTZbfb1draGrbe2tqqlJSUY/b/61//qgMHDuiKK64IrQUCAUnSsGHDtG/fPk2ZMiXsmLi4OMXFxfVpHgAAMPid0ttS9fX16unpOWb9s88+0+9///s+/5zY2Fjl5OTI6/WG1gKBgLxerwoKCo7ZPyMjQ3v27NHu3btD2w9/+EN9//vf1+7du3nLCQAAnNyVmz//+c+hP7/55pth98X4/X7V1dUpNTX1pAZwu90qKSlRbm6u8vLyVF1dra6uLjmdTklScXGxUlNT5fF4FB8fr3PPPTfs+K+uHv3zOgAAGJpOKm6ys7Nls9lks9l08cUXH/P8iBEjtHr16pMaoKioSEeOHFFFRYV8Pp+ys7NVV1cXusm4ublZMTGWf2IdAAAMEicVN++9956CwaAmT56sxsZGjR8/PvRcbGyskpKSZLfbT3oIl8sll8vV63P19fUnPHbjxo0n/XoAAMBcJxU3kyZNkvT1TbwAAADRps9x8/TTT2vevHkaPny4nn766RPu+8Mf/vBbDwYAAHAq+hw38+fPl8/nU1JS0gm/qNJms8nv9/fHbAAAACetz3Hzj29F8bYUAACIVqf8G4q9Xq+8Xq/a2trCYsdms2nDhg39MhwAAMDJOqW4WbZsmW6//Xbl5uZqwoQJstls/T0XAADAKTmluKmpqdHGjRt13XXX9fc8AAAA38op/Xa8np4ezZo1q79nAQAA+NZOKW5+8pOfaNOmTf09CwAAwLfW57el3G536M+BQEDr1q3Tiy++qJkzZ2r48OFh+1ZVVfXfhAAAACehz3HzxhtvhD3Ozs6WJO3duzdsnZuLAQCAlfocNy+//PJAzgEAANAv+LptAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABglKiImzVr1ig9PV3x8fHKz89XY2PjcffdunWrcnNzNXbsWJ122mnKzs7WI488EsFpAQBANLM8brZs2SK3263Kykrt2rVLWVlZKiwsVFtbW6/7jxs3TkuXLlVDQ4P+/Oc/y+l0yul06rnnnovw5AAAIBpZHjdVVVUqLS2V0+lUZmamampqNHLkSNXW1va6/9y5c/Vv//Zvmj59uqZMmaKysjLNnDlTO3bsiPDkAAAgGlkaNz09PWpqapLD4QitxcTEyOFwqKGh4RuPDwaD8nq92rdvny666KJe9+nu7lZnZ2fYBgAAzGVp3LS3t8vv9ys5OTlsPTk5WT6f77jHdXR0aNSoUYqNjdXll1+u1atX69JLL+11X4/Ho4SEhNCWlpbWr+cAAACii+VvS52K0aNHa/fu3dq5c6eWL18ut9ut+vr6XvctLy9XR0dHaGtpaYnssAAAIKKGWfniiYmJstvtam1tDVtvbW1VSkrKcY+LiYnR1KlTJUnZ2dl666235PF4NHfu3GP2jYuLU1xcXL/ODQAAopelV25iY2OVk5Mjr9cbWgsEAvJ6vSooKOjzzwkEAuru7h6IEQEAwCBj6ZUbSXK73SopKVFubq7y8vJUXV2trq4uOZ1OSVJxcbFSU1Pl8XgkfXkPTW5urqZMmaLu7m5t27ZNjzzyiNauXWvlaQAAgChhedwUFRXpyJEjqqiokM/nU3Z2turq6kI3GTc3Nysm5usLTF1dXbrhhhv0/vvva8SIEcrIyNCjjz6qoqIiq04BAABEEcvjRpJcLpdcLlevz/3zjcJ33nmn7rzzzghMBQAABqNB+WkpAACA4yFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARomKuFmzZo3S09MVHx+v/Px8NTY2Hnff9evX68ILL9Tpp5+u008/XQ6H44T7AwCAocXyuNmyZYvcbrcqKyu1a9cuZWVlqbCwUG1tbb3uX19fr2uuuUYvv/yyGhoalJaWph/84Af64IMPIjw5AACIRpbHTVVVlUpLS+V0OpWZmamamhqNHDlStbW1ve7/61//WjfccIOys7OVkZGhBx98UIFAQF6vN8KTAwCAaGRp3PT09KipqUkOhyO0FhMTI4fDoYaGhj79jE8++USff/65xo0b1+vz3d3d6uzsDNsAAIC5LI2b9vZ2+f1+JScnh60nJyfL5/P16WcsXrxYEydODAukf+TxeJSQkBDa0tLSvvXcAAAgeln+ttS3sXLlSm3evFlPPPGE4uPje92nvLxcHR0doa2lpSXCUwIAgEgaZuWLJyYmym63q7W1NWy9tbVVKSkpJzz2nnvu0cqVK/Xiiy9q5syZx90vLi5OcXFx/TIvAACIfpZeuYmNjVVOTk7YzcBf3RxcUFBw3ON+9atf6Y477lBdXZ1yc3MjMSoAABgkLL1yI0lut1slJSXKzc1VXl6eqqur1dXVJafTKUkqLi5WamqqPB6PJOmuu+5SRUWFNm3apPT09NC9OaNGjdKoUaMsOw8AABAdLI+boqIiHTlyRBUVFfL5fMrOzlZdXV3oJuPm5mbFxHx9gWnt2rXq6enRj370o7CfU1lZqdtuuy2SowMAgChkedxIksvlksvl6vW5+vr6sMcHDhwY+IEAAMCgNag/LQUAAPDPiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRLI+bNWvWKD09XfHx8crPz1djY+Nx9/3LX/6i//iP/1B6erpsNpuqq6sjNygAABgULI2bLVu2yO12q7KyUrt27VJWVpYKCwvV1tbW6/6ffPKJJk+erJUrVyolJSXC0wIAgMHA0ripqqpSaWmpnE6nMjMzVVNTo5EjR6q2trbX/S+44ALdfffduvrqqxUXFxfhaQEAwGBgWdz09PSoqalJDofj62FiYuRwONTQ0NBvr9Pd3a3Ozs6wDQAAmMuyuGlvb5ff71dycnLYenJysnw+X7+9jsfjUUJCQmhLS0vrt58NAACij+U3FA+08vJydXR0hLaWlharRwIAAANomFUvnJiYKLvdrtbW1rD11tbWfr1ZOC4ujvtzAAAYQiy7chMbG6ucnBx5vd7QWiAQkNfrVUFBgVVjAQCAQc6yKzeS5Ha7VVJSotzcXOXl5am6ulpdXV1yOp2SpOLiYqWmpsrj8Uj68ibkN998M/TnDz74QLt379aoUaM0depUy84DAABED0vjpqioSEeOHFFFRYV8Pp+ys7NVV1cXusm4ublZMTFfX1w6dOiQzj///NDje+65R/fcc4/mzJmj+vr6SI8PAACikKVxI0kul0sul6vX5/45WNLT0xUMBiMwFQAAGKyM/7QUAAAYWogbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUYgbAABgFOIGAAAYhbgBAABGIW4AAIBRiBsAAGAU4gYAABiFuAEAAEYhbgAAgFGIGwAAYBTiBgAAGIW4AQAARiFuAACAUaIibtasWaP09HTFx8crPz9fjY2NJ9z/t7/9rTIyMhQfH6/zzjtP27Zti9CkAAAg2lkeN1u2bJHb7VZlZaV27dqlrKwsFRYWqq2trdf9X331VV1zzTVauHCh3njjDc2fP1/z58/X3r17Izw5AACIRpbHTVVVlUpLS+V0OpWZmamamhqNHDlStbW1ve6/atUq/eu//qtuueUWTZ8+XXfccYe++93v6v7774/w5AAAIBpZGjc9PT1qamqSw+EIrcXExMjhcKihoaHXYxoaGsL2l6TCwsLj7g8AAIaWYVa+eHt7u/x+v5KTk8PWk5OT9fbbb/d6jM/n63V/n8/X6/7d3d3q7u4OPe7o6JAkdXZ2HrOvv/vTk5o/WvV2bidy9DP/AE0SWSd73l98+sUATRJZJ3veXV8MzfP+tPuTAZoksk72vD/7/PMBmiSyTva8P/6sa4AmiayTPe9//O/dYNbbeX+1FgwGv/F4S+MmEjwej5YtW3bMelpamgXTREbC6v9j9QjW8CRYPYElEhYPzfNWwtA87/+3xuoJrHHnY0Pzn7futHoAa6xcufK4zx09elQJ3/Dvv6Vxk5iYKLvdrtbW1rD11tZWpaSk9HpMSkrKSe1fXl4ut9sdehwIBPThhx/qjDPOkM1m+5ZncHI6OzuVlpamlpYWjRkzJqKvbSXOm/MeCjhvznsosPK8g8Ggjh49qokTJ37jvpbGTWxsrHJycuT1ejV//nxJX8aH1+uVy+Xq9ZiCggJ5vV7ddNNNobUXXnhBBQUFve4fFxenuLi4sLWxY8f2x/inbMyYMUPqX4avcN5DC+c9tHDeQ4tV5/1NV2y+YvnbUm63WyUlJcrNzVVeXp6qq6vV1dUlp9MpSSouLlZqaqo8Ho8kqaysTHPmzNG9996ryy+/XJs3b9brr7+udevWWXkaAAAgSlgeN0VFRTpy5IgqKirk8/mUnZ2turq60E3Dzc3Nion5+kNds2bN0qZNm/SLX/xCt956q84++2w9+eSTOvfcc606BQAAEEUsjxtJcrlcx30bqr6+/pi1q666SlddddUAT9X/4uLiVFlZeczbZKbjvDnvoYDz5ryHgsFy3rZgXz5TBQAAMEhY/huKAQAA+hNxAwAAjELcAAAAoxA3EfDKK6/oiiuu0MSJE2Wz2fTkk09aPVJEeDweXXDBBRo9erSSkpI0f/587du3z+qxBtzatWs1c+bM0O+BKCgo0LPPPmv1WBG3cuVK2Wy2sN9JZaLbbrtNNpstbMvIyLB6rIj44IMPdO211+qMM87QiBEjdN555+n111+3eqwBlZ6efsw/b5vNpkWLFlk92oDy+/365S9/qe985zsaMWKEpkyZojvuuKNPX4Vghaj4tJTpurq6lJWVpeuvv17//u//bvU4EbN9+3YtWrRIF1xwgb744gvdeuut+sEPfqA333xTp512mtXjDZgzzzxTK1eu1Nlnn61gMKiHH35YV155pd544w3NmDHD6vEiYufOnXrggQc0c+ZMq0eJiBkzZujFF18MPR42zPz/a/373/+u2bNn6/vf/76effZZjR8/Xu+++65OP/10q0cbUDt37pTf//X38e3du1eXXnrpoPwE78m46667tHbtWj388MOaMWOGXn/9dTmdTiUkJOhnP/uZ1eMdw/x/A6PAvHnzNG/ePKvHiLi6urqwxxs3blRSUpKampp00UUXWTTVwLviiivCHi9fvlxr167Va6+9NiTi5uOPP9aCBQu0fv163Xnn0PhinGHDhh33K2BMdddddyktLU0PPfRQaO073/mOhRNFxvjx48Mer1y5UlOmTNGcOXMsmigyXn31VV155ZW6/PLLJX15Bes3v/mNGhsbLZ6sd7wthYj56hvZx40bZ/EkkeP3+7V582Z1dXUd9ytCTLNo0SJdfvnlcjgcVo8SMe+++64mTpyoyZMna8GCBWpubrZ6pAH39NNPKzc3V1dddZWSkpJ0/vnna/369VaPFVE9PT169NFHdf3110f8uwojbdasWfJ6vXrnnXckSX/605+0Y8eOqP2LO1duEBGBQEA33XSTZs+ePSR+m/SePXtUUFCgzz77TKNGjdITTzyhzMxMq8cacJs3b9auXbu0c+dOq0eJmPz8fG3cuFHTpk3T4cOHtWzZMl144YXau3evRo8ebfV4A+Zvf/ub1q5dK7fbrVtvvVU7d+7Uz372M8XGxqqkpMTq8SLiySef1EcffaQf//jHVo8y4JYsWaLOzk5lZGTIbrfL7/dr+fLlWrBggdWj9Yq4QUQsWrRIe/fu1Y4dO6weJSKmTZum3bt3q6OjQ48//rhKSkq0fft2owOnpaVFZWVleuGFFxQfH2/1OBHzj39znTlzpvLz8zVp0iQ99thjWrhwoYWTDaxAIKDc3FytWLFCknT++edr7969qqmpGTJxs2HDBs2bN69P31I92D322GP69a9/rU2bNmnGjBnavXu3brrpJk2cODEq/3kTNxhwLpdL//u//6tXXnlFZ555ptXjRERsbKymTp0qScrJydHOnTu1atUqPfDAAxZPNnCamprU1tam7373u6E1v9+vV155Rffff7+6u7tlt9stnDAyxo4dq3POOUf79++3epQBNWHChGNiffr06fqf//kfiyaKrIMHD+rFF1/U1q1brR4lIm655RYtWbJEV199tSTpvPPO08GDB+XxeIgbDC3BYFA33nijnnjiCdXX1w+Jmw2PJxAIqLu72+oxBtQll1yiPXv2hK05nU5lZGRo8eLFQyJspC9vqP7rX/+q6667zupRBtTs2bOP+dUO77zzjiZNmmTRRJH10EMPKSkpKXSDrek++eSTsC+xliS73a5AIGDRRCdG3ETAxx9/HPa3uPfee0+7d+/WuHHjdNZZZ1k42cBatGiRNm3apKeeekqjR4+Wz+eTJCUkJGjEiBEWTzdwysvLNW/ePJ111lk6evSoNm3apPr6ej333HNWjzagRo8efcz9VKeddprOOOMMo++zuvnmm3XFFVdo0qRJOnTokCorK2W323XNNddYPdqA+vnPf65Zs2ZpxYoV+s///E81NjZq3bp1WrdundWjDbhAIKCHHnpIJSUlQ+Jj/9KXnwJdvny5zjrrLM2YMUNvvPGGqqqqdP3111s9Wu+CGHAvv/xyUNIxW0lJidWjDajezllS8KGHHrJ6tAF1/fXXBydNmhSMjY0Njh8/PnjJJZcEn3/+eavHssScOXOCZWVlVo8xoIqKioITJkwIxsbGBlNTU4NFRUXB/fv3Wz1WRPzud78LnnvuucG4uLhgRkZGcN26dVaPFBHPPfdcUFJw3759Vo8SMZ2dncGysrLgWWedFYyPjw9Onjw5uHTp0mB3d7fVo/WKbwUHAABG4ffcAAAAoxA3AADAKMQNAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAY0tLT01VdXW31GAD6EXEDIGrMnTtXN910k9VjABjkiBsAAGAU4gZAVPjxj3+s7du3a9WqVbLZbLLZbDpw4IC2b9+uvLw8xcXFacKECVqyZIm++OKL0HFz586Vy+WSy+VSQkKCEhMT9ctf/lKn+rV5Dz74oMaOHSuv19tfpwYgwogbAFFh1apVKigoUGlpqQ4fPqzDhw9r+PDhuuyyy3TBBRfoT3/6k9auXasNGzbozjvvDDv24Ycf1rBhw9TY2KhVq1apqqpKDz744EnP8Ktf/UpLlizR888/r0suuaS/Tg1AhA2zegAAkKSEhATFxsZq5MiRSklJkSQtXbpUaWlpuv/++2Wz2ZSRkaFDhw5p8eLFqqioUEzMl38/S0tL03333SebzaZp06Zpz549uu+++1RaWtrn11+8eLEeeeQRbd++XTNmzBiQcwQQGVy5ARC13nrrLRUUFMhms4XWZs+erY8//ljvv/9+aO173/te2D4FBQV699135ff7+/Q69957r9avX68dO3YQNoABiBsAQ96FF14ov9+vxx57zOpRAPQD4gZA1IiNjQ272jJ9+nQ1NDSE3Rz8hz/8QaNHj9aZZ54ZWvvjH/8Y9nNee+01nX322bLb7X163by8PD377LNasWKF7rnnnm95FgCsRtwAiBrp6en64x//qAMHDqi9vV033HCDWlpadOONN+rtt9/WU089pcrKSrnd7tD9NpLU3Nwst9utffv26Te/+Y1Wr16tsrKyk3rtWbNmadu2bVq2bBm/1A8Y5LihGEDUuPnmm1VSUqLMzEx9+umneu+997Rt2zbdcsstysrK0rhx47Rw4UL94he/CDuuuLhYn376qfLy8mS321VWVqb/+q//OunX/5d/+Rc988wzuuyyy2S323XjjTf216kBiCBb8FR/GQQARIG5c+cqOzubqy0AQnhbCgAAGIW4AWCs3//+9xo1atRxNwBm4m0pAMb69NNP9cEHHxz3+alTp0ZwGgCRQtwAAACj8LYUAAAwCnEDAACMQtwAAACjEDcAAMAoxA0AADAKcQMAAIxC3AAAAKMQNwAAwCj/H7Z4QO3/Cwf1AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seaborn as sns\n",
    "\n",
    "sns.barplot(x='top_k', y='hit', data=hit_stat_df, errorbar=None)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7925564a-7d30-4914-baaf-4a00abb7686d",
   "metadata": {
    "papermill": {
     "duration": 0.109216,
     "end_time": "2024-11-23T14:35:26.464009",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.354793",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 生成答案"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "27132c3b-0051-4df6-bf57-fd804acb8d17",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:05:28.326333Z",
     "iopub.status.busy": "2024-12-04T15:05:28.326029Z",
     "iopub.status.idle": "2024-12-04T15:05:28.399783Z",
     "shell.execute_reply": "2024-12-04T15:05:28.399275Z",
     "shell.execute_reply.started": "2024-12-04T15:05:28.326319Z"
    },
    "papermill": {
     "duration": 0.199165,
     "end_time": "2024-11-23T14:35:27.323500",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.124335",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_1294088/2053630348.py:3: LangChainDeprecationWarning: The class `Ollama` was deprecated in LangChain 0.3.1 and will be removed in 1.0.0. An updated version of the class exists in the :class:`~langchain-ollama package and should be used instead. To use it run `pip install -U :class:`~langchain-ollama` and import as `from :class:`~langchain_ollama import OllamaLLM``.\n",
      "  ollama_llm = Ollama(\n"
     ]
    }
   ],
   "source": [
    "from langchain.llms import Ollama\n",
    "\n",
    "ollama_llm = Ollama(\n",
    "    model='qwen2:7b-instruct-32k',\n",
    "    base_url='http://localhost:11434',\n",
    "    top_k=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "50404beb-3be0-4aaa-b124-8c7a52b84531",
   "metadata": {
    "editable": true,
    "execution": {
     "iopub.execute_input": "2024-12-04T15:05:28.400430Z",
     "iopub.status.busy": "2024-12-04T15:05:28.400295Z",
     "iopub.status.idle": "2024-12-04T15:05:28.404178Z",
     "shell.execute_reply": "2024-12-04T15:05:28.403784Z",
     "shell.execute_reply.started": "2024-12-04T15:05:28.400417Z"
    },
    "papermill": {
     "duration": 0.159318,
     "end_time": "2024-11-23T14:35:26.768506",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.609188",
     "status": "completed"
    },
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "def rag(vector_db, llm, query, n_chunks=4):\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个金融分析师，擅长根据所获取的信息片段，对问题进行分析和推理。\n",
    "你的任务是根据所获取的信息片段（<<<<context>>><<<</context>>>之间的内容）回答问题。\n",
    "回答保持简洁，不必重复问题，不要添加描述性解释和与答案无关的任何内容。\n",
    "已知信息：\n",
    "<<<<context>>>\n",
    "{{knowledge}}\n",
    "<<<</context>>>\n",
    "\n",
    "问题：{{query}}\n",
    "请回答：\n",
    "\"\"\".strip()\n",
    "    chunks = vector_db.similarity_search(query, k=n_chunks)\n",
    "    prompt = prompt_tmpl.replace('{{knowledge}}', '\\n\\n'.join([doc.page_content for doc in chunks])).replace('{{query}}', query)\n",
    "    retry_count = 3\n",
    "\n",
    "    resp = ''\n",
    "    while retry_count > 0:\n",
    "        try:\n",
    "            resp = llm.invoke(prompt)\n",
    "            break\n",
    "        except Exception as e:\n",
    "            retry_count -= 1\n",
    "            sleeping_seconds = 2 ** (4 - retry_count)\n",
    "            print(f\"query={query}, error={e}, sleeping={sleeping_seconds}, remaining retry count={retry_count}\")\n",
    "            \n",
    "            time.sleep(sleeping_seconds)\n",
    "    \n",
    "    return resp, chunks"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "95e5a804-2dc6-411c-ba71-6ccf765b2b73",
   "metadata": {
    "papermill": {
     "duration": 0.135973,
     "end_time": "2024-11-23T14:35:27.001401",
     "exception": false,
     "start_time": "2024-11-23T14:35:26.865428",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "166392d8-f801-4372-b8ad-3e79aef0b350",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:05:28.404750Z",
     "iopub.status.busy": "2024-12-04T15:05:28.404627Z",
     "iopub.status.idle": "2024-12-04T15:05:28.412401Z",
     "shell.execute_reply": "2024-12-04T15:05:28.411955Z",
     "shell.execute_reply.started": "2024-12-04T15:05:28.404738Z"
    },
    "papermill": {
     "duration": 0.141864,
     "end_time": "2024-11-23T14:35:27.564409",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.422545",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "prediction_df = qa_df[qa_df['dataset'] == 'test'][['uuid', 'question', 'qa_type', 'answer']].rename(columns={'answer': 'ref_answer'})\n",
    "\n",
    "def predict(vector_db, llm, prediction_df, n_chunks):\n",
    "    prediction_df = prediction_df.copy()\n",
    "    answer_dict = {}\n",
    "\n",
    "    for idx, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        uuid = row['uuid']\n",
    "        question = row['question']\n",
    "        answer, chunks = rag(vector_db, llm, question, n_chunks=n_chunks)\n",
    "        assert len(chunks) <= n_chunks\n",
    "        answer_dict[question] = {\n",
    "            'uuid': uuid,\n",
    "            'ref_answer': row['ref_answer'],\n",
    "            'gen_answer': answer,\n",
    "            'chunks': chunks\n",
    "        }\n",
    "\n",
    "    prediction_df.loc[:, 'gen_answer'] = prediction_df['question'].apply(lambda q: answer_dict[q]['gen_answer'])\n",
    "    prediction_df.loc[:, 'chunks'] = prediction_df['question'].apply(lambda q: answer_dict[q]['chunks'])\n",
    "\n",
    "    return prediction_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "ca46d5f1-e698-457d-abb6-92d83cd59c66",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:05:28.412993Z",
     "iopub.status.busy": "2024-12-04T15:05:28.412866Z",
     "iopub.status.idle": "2024-12-04T15:05:28.419123Z",
     "shell.execute_reply": "2024-12-04T15:05:28.418669Z",
     "shell.execute_reply.started": "2024-12-04T15:05:28.412981Z"
    },
    "papermill": {
     "duration": 514.92352,
     "end_time": "2024-11-23T14:44:02.805529",
     "exception": false,
     "start_time": "2024-11-23T14:35:27.882009",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "save_path = os.path.join(expr_dir, 'predictions.pkl')\n",
    "\n",
    "if os.path.exists(save_path):\n",
    "    pred_dfs = pickle.load(open(save_path, 'rb'))\n",
    "else:\n",
    "    pred_dfs = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "ec9a4190-4695-43de-8bce-66abb6f6ec7c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:05:28.419763Z",
     "iopub.status.busy": "2024-12-04T15:05:28.419632Z",
     "iopub.status.idle": "2024-12-04T15:53:30.005519Z",
     "shell.execute_reply": "2024-12-04T15:53:30.004789Z",
     "shell.execute_reply.started": "2024-12-04T15:05:28.419750Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "141c554db64e4e779993298a903d8c44",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/8 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=3\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7ae74ee8f81a4bec8d239b7cdd991f53",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=4\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2cfae0c469b14cebab39af6e720c0242",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=5\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0a8b0aee1d0c4787be1d5e6dcedf0139",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=6\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d78513f8e0a44659b28b0db23208f3d3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=7\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "13f18c11cfca44b4a50041fc704b03c7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=8\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "12745dfef2974bf3ba50a5bfa8a0cff5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=9\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e393a151970240759a29d3e88130e43f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=10\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "36e12ca92635499c876fb2d8ee206937",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "n_chunks_arr = range(3, 11)\n",
    "\n",
    "for n_chunks in tqdm(n_chunks_arr):\n",
    "    if n_chunks in pred_dfs:\n",
    "        continue\n",
    "\n",
    "    print(f\"n_chunks={n_chunks}\")\n",
    "    pred_dfs[n_chunks] = predict(md_vector_db, ollama_llm, prediction_df, n_chunks=n_chunks)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "92de70bc-9cdd-47a1-917e-cdb15138a28f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:53:30.006290Z",
     "iopub.status.busy": "2024-12-04T15:53:30.006094Z",
     "iopub.status.idle": "2024-12-04T15:53:30.010416Z",
     "shell.execute_reply": "2024-12-04T15:53:30.009715Z",
     "shell.execute_reply.started": "2024-12-04T15:53:30.006271Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys([3, 4, 5, 6, 7, 8, 9, 10])"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_dfs.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "f7026bac-9927-4a33-85c0-bc1b35f3a603",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:53:30.011366Z",
     "iopub.status.busy": "2024-12-04T15:53:30.011156Z",
     "iopub.status.idle": "2024-12-04T15:53:30.018857Z",
     "shell.execute_reply": "2024-12-04T15:53:30.018207Z",
     "shell.execute_reply.started": "2024-12-04T15:53:30.011346Z"
    }
   },
   "outputs": [],
   "source": [
    "save_path = os.path.join(expr_dir, 'pred_dfs.pkl')\n",
    "\n",
    "if not os.path.exists(save_path):\n",
    "    print(f'saving to {save_path}')\n",
    "    pickle.dump(pred_dfs, open(save_path, 'wb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7d79e974-089f-4c08-ba5e-804f6542e06a",
   "metadata": {
    "papermill": {
     "duration": 0.14423,
     "end_time": "2024-11-23T14:44:03.513124",
     "exception": false,
     "start_time": "2024-11-23T14:44:03.368894",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# 评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "217568fe-c0e4-49eb-9a7c-9fdfbc033d8a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:53:30.020143Z",
     "iopub.status.busy": "2024-12-04T15:53:30.019713Z",
     "iopub.status.idle": "2024-12-04T15:53:30.239593Z",
     "shell.execute_reply": "2024-12-04T15:53:30.239072Z",
     "shell.execute_reply.started": "2024-12-04T15:53:30.020122Z"
    },
    "papermill": {
     "duration": 0.369729,
     "end_time": "2024-11-23T14:44:04.017198",
     "exception": false,
     "start_time": "2024-11-23T14:44:03.647469",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "import time\n",
    "\n",
    "judge_llm = ChatOpenAI(\n",
    "    api_key=os.environ['LLM_API_KEY'],\n",
    "    base_url=os.environ['LLM_BASE_URL'],\n",
    "    model_name='qwen2-72b-instruct',\n",
    "    temperature=0\n",
    ")\n",
    "\n",
    "def evaluate(prediction_df):\n",
    "    \"\"\"\n",
    "    对预测结果进行打分\n",
    "    :param prediction_df: 预测结果，需要包含问题，参考答案，生成的答案，列名分别为question, ref_answer, gen_answer\n",
    "    :return 打分模型原始返回结果\n",
    "    \"\"\"\n",
    "    prompt_tmpl = \"\"\"\n",
    "你是一个经济学博士，现在我有一系列问题，有一个助手已经对这些问题进行了回答，你需要参照参考答案，评价这个助手的回答是否正确，仅回复“是”或“否”即可，不要带其他描述性内容或无关信息。\n",
    "问题：\n",
    "<question>\n",
    "{{question}}\n",
    "</question>\n",
    "\n",
    "参考答案：\n",
    "<ref_answer>\n",
    "{{ref_answer}}\n",
    "</ref_answer>\n",
    "\n",
    "助手回答：\n",
    "<gen_answer>\n",
    "{{gen_answer}}\n",
    "</gen_answer>\n",
    "请评价：\n",
    "    \"\"\"\n",
    "    results = []\n",
    "\n",
    "    for _, row in tqdm(prediction_df.iterrows(), total=len(prediction_df)):\n",
    "        question = row['question']\n",
    "        ref_answer = row['ref_answer']\n",
    "        gen_answer = row['gen_answer']\n",
    "\n",
    "        prompt = prompt_tmpl.replace('{{question}}', question).replace('{{ref_answer}}', str(ref_answer)).replace('{{gen_answer}}', gen_answer).strip()\n",
    "        \n",
    "        retry_count = 3\n",
    "        result = ''\n",
    "        \n",
    "        while retry_count > 0:\n",
    "            try:\n",
    "                result = judge_llm.invoke(prompt).content\n",
    "                break\n",
    "            except Exception as e:\n",
    "                retry_count -= 1\n",
    "                sleeping_seconds = 2 ** (4 - retry_count)\n",
    "                print(f\"query={question}, error={e}, sleeping={sleeping_seconds}, remaining retry count={retry_count}\")\n",
    "                \n",
    "                time.sleep(sleeping_seconds)\n",
    "        \n",
    "        results.append(result)\n",
    "\n",
    "        time.sleep(1)\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "71db81af-b8f9-47ba-958b-761896516605",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T15:53:30.240136Z",
     "iopub.status.busy": "2024-12-04T15:53:30.240005Z",
     "iopub.status.idle": "2024-12-04T16:14:05.896379Z",
     "shell.execute_reply": "2024-12-04T16:14:05.893959Z",
     "shell.execute_reply.started": "2024-12-04T15:53:30.240124Z"
    },
    "papermill": {
     "duration": 150.566109,
     "end_time": "2024-11-23T14:46:34.714324",
     "exception": false,
     "start_time": "2024-11-23T14:44:04.148215",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cf34036da1224f73a26e30256f5d3208",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/8 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a5ded32490f04349854ce40ac6d3b33e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=3 raw_score unique: ['是' '否'], accuracy=0.71\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9b3f3940379541a3b586cbf955e35156",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=4 raw_score unique: ['是' '否'], accuracy=0.74\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "279db3a8c6ca433f8a3af21726813750",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=5 raw_score unique: ['是' '否'\n",
      " '否\\n\\n注：由于问题中没有给出具体的年份，而参考答案和助手回答中给出了不同的时间点（一个没有年份，一个指定了2024年），因此无法判断助手的回答是否完全准确。但是，按照题目要求，我仅能回答“是”或“否”，所以我选择了“否”。如果需要更详细的解释，请忽略此注释。'], accuracy=0.74\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d156680e1f904394ae692723096eac98",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=6 raw_score unique: ['是' '否'], accuracy=0.76\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "834ac86202314941be11c3edaf6f50b0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=7 raw_score unique: ['是' '否'], accuracy=0.77\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e6e491c552214a48b4ca5da91507dd1f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=8 raw_score unique: ['是' '否'], accuracy=0.78\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "64874e1d0a6e431ba79664a13ce179cd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=9 raw_score unique: ['是' '否'], accuracy=0.77\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1247cf2d834344d5b3fe8aa124abc9c6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/100 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "n_chunks=10 raw_score unique: ['是' '否'\n",
      " '否\\n\\n（注：虽然纽约联储全球供应链压力指数下降可能表明供应链状况改善，但这并不一定意味着全球供应链已经完全恢复，因此，“持续恢复”和“降至有记录以来的最低值”这两个表述并不等价。） \\n\\n但是，根据题目要求，我只应该回答“是”或“否”，所以我的回答是“否”。'], accuracy=0.78\n"
     ]
    }
   ],
   "source": [
    "metrics = []\n",
    "\n",
    "for n_chunks in tqdm(pred_dfs):\n",
    "    pred_df = pred_dfs[n_chunks]\n",
    "    pred_df['raw_score'] = evaluate(pred_df)\n",
    "    pred_df['score'] = (pred_df['raw_score'] == '是').astype(int)\n",
    "    print(f\"n_chunks={n_chunks} raw_score unique: {pred_df['raw_score'].unique()}, accuracy={pred_df['score'].mean()}\")\n",
    "\n",
    "    metrics.append({\n",
    "        'n_chunks': n_chunks,\n",
    "        'accuracy': pred_df['score'].mean()\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "7da1b98e-99aa-4e11-9297-91eac1c62493",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T16:14:05.899522Z",
     "iopub.status.busy": "2024-12-04T16:14:05.898771Z",
     "iopub.status.idle": "2024-12-04T16:14:05.910435Z",
     "shell.execute_reply": "2024-12-04T16:14:05.908098Z",
     "shell.execute_reply.started": "2024-12-04T16:14:05.899455Z"
    },
    "papermill": {
     "duration": 0.138037,
     "end_time": "2024-11-23T14:46:35.040595",
     "exception": false,
     "start_time": "2024-11-23T14:46:34.902558",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "metrics_df = pd.DataFrame(metrics)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "2c99c078-d294-40b8-b57b-31cfd7349c3e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T16:14:05.914966Z",
     "iopub.status.busy": "2024-12-04T16:14:05.913332Z",
     "iopub.status.idle": "2024-12-04T16:14:05.931237Z",
     "shell.execute_reply": "2024-12-04T16:14:05.929960Z",
     "shell.execute_reply.started": "2024-12-04T16:14:05.914864Z"
    },
    "papermill": {
     "duration": 0.107466,
     "end_time": "2024-11-23T14:46:35.243603",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.136137",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>n_chunks</th>\n",
       "      <th>accuracy</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>0.71</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4</td>\n",
       "      <td>0.74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5</td>\n",
       "      <td>0.74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6</td>\n",
       "      <td>0.76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7</td>\n",
       "      <td>0.77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>8</td>\n",
       "      <td>0.78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>9</td>\n",
       "      <td>0.77</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>10</td>\n",
       "      <td>0.78</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   n_chunks  accuracy\n",
       "0         3      0.71\n",
       "1         4      0.74\n",
       "2         5      0.74\n",
       "3         6      0.76\n",
       "4         7      0.77\n",
       "5         8      0.78\n",
       "6         9      0.77\n",
       "7        10      0.78"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "metrics_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "423897f2-786e-415b-a613-55a4359faf76",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T16:14:05.932842Z",
     "iopub.status.busy": "2024-12-04T16:14:05.932484Z",
     "iopub.status.idle": "2024-12-04T16:14:06.052951Z",
     "shell.execute_reply": "2024-12-04T16:14:06.052484Z",
     "shell.execute_reply.started": "2024-12-04T16:14:05.932807Z"
    },
    "papermill": {
     "duration": 0.094328,
     "end_time": "2024-11-23T14:46:35.431162",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.336834",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Axes: xlabel='n_chunks', ylabel='accuracy'>"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGxCAYAAACeKZf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAvNklEQVR4nO3de1zUdaL/8feAcvOCF3RQQshLXvKCwsJimp2aonTd3DouuW6wWJyzKetlTq1RKl3FzIhKN7KVbY9WUm61laXZFLYVRwyjNC9lN0gF9ZSguIHNzO+Pfk2HBQ3GYb7w9fV8PL6PR/OZz+c77++61bvvfGbG4na73QIAADCJAKMDAAAA+BLlBgAAmArlBgAAmArlBgAAmArlBgAAmArlBgAAmArlBgAAmArlBgAAmEonowP4m8vl0sGDB9WtWzdZLBaj4wAAgBZwu906fvy4+vfvr4CAM9+bOefKzcGDBxUdHW10DAAA4IXKykqdd955Z5xzzpWbbt26Sfr+f5zu3bsbnAYAALREbW2toqOjPf8eP5Nzrtz88FZU9+7dKTcAAHQwLdlSwoZiAABgKoaXm1WrVik2NlYhISFKSkpSaWnpGefn5+dr6NChCg0NVXR0tBYsWKBvv/3WT2kBAEB7Z2i5KSoqkt1uV05Ojnbs2KExY8YoJSVFhw8fbnb+U089pVtvvVU5OTnas2eP1qxZo6KiIt12221+Tg4AANorQ8tNXl6eMjMzlZGRoREjRqigoEBhYWEqLCxsdv67776riy66SL/5zW8UGxurK664QjNmzPjJuz0AAODcYVi5aWhoUFlZmWw2249hAgJks9lUUlLS7Jrx48errKzMU2Y+++wzvfLKK5o8ebJfMgMAgPbPsE9LHT16VE6nU1artdG41WrV3r17m13zm9/8RkePHtWECRPkdrv13Xff6fe///0Z35aqr69XfX2953Ftba1vLgAAALRLhm8obo3i4mItXbpUf/rTn7Rjxw4999xz2rhxo+6+++7TrsnNzVV4eLjn4Av8AAAwN4vb7XYb8cINDQ0KCwvThg0bNG3aNM94enq6jh07pr///e9N1kycOFE///nPdf/993vG1q1bp//4j//QiRMnmv065ubu3ERHR6umpobvuQEAoIOora1VeHh4i/79bdidm6CgIMXHx8vhcHjGXC6XHA6HkpOTm11z8uTJJgUmMDBQ0ve/OdGc4OBgzxf28cV9AACYn6HfUGy325Wenq6EhAQlJiYqPz9fdXV1ysjIkCSlpaUpKipKubm5kqSpU6cqLy9PY8eOVVJSkvbv36/Fixdr6tSpnpIDAADObYaWm9TUVB05ckRLlixRVVWV4uLitGnTJs8m44qKikZ3ahYtWiSLxaJFixbpwIED6tOnj6ZOnap7773XqEsAAADtjGF7bozSmvfsAABA+9Ah9twAAAC0BcoNAAAwFcoNAAAwFUM3FAMA2ta9v/13oyP4xO3rNhgdoUO44447jI7gE2d7Hdy5AQAApkK5AQAApkK5AQAApsKeGwDnhJX/9ZLREXwi64GpRkfoEPbc+4bREXxi+O2XGh2hQ+LODQAAMBXKDQAAMBXKDQAAMBXKDQAAMBU2FAPnmK0XTzI6gk9Memur0REAtFPcuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKbCl/hBFXeNMjqCTwxYsrNV8y965KI2SuJf7/zhHaMjAEC7wp0bAABgKpQbAABgKpQbAABgKpQbAABgKpQbAABgKpQbAABgKpQbAABgKpQbAABgKpQbAABgKpQbAABgKpQbAABgKpQbAABgKu3ihzNXrVql+++/X1VVVRozZoweeeQRJSYmNjv3kksu0datW5uMT548WRs3bjyrHPG3/PdZrW8vyu5PMzoCAACGMfzOTVFRkex2u3JycrRjxw6NGTNGKSkpOnz4cLPzn3vuOR06dMhz7Nq1S4GBgZo+fbqfkwMAgPbI8HKTl5enzMxMZWRkaMSIESooKFBYWJgKCwubnd+rVy9FRkZ6ji1btigsLIxyAwAAJBlcbhoaGlRWViabzeYZCwgIkM1mU0lJSYvOsWbNGl133XXq0qVLW8UEAAAdiKF7bo4ePSqn0ymr1dpo3Gq1au/evT+5vrS0VLt27dKaNWtOO6e+vl719fWex7W1td4HBgAA7Z7hb0udjTVr1mjUqFGn3XwsSbm5uQoPD/cc0dHRfkwIAAD8zdByExERocDAQFVXVzcar66uVmRk5BnX1tXVaf369brhhhvOOC87O1s1NTWeo7Ky8qxzAwCA9svQchMUFKT4+Hg5HA7PmMvlksPhUHJy8hnXPvvss6qvr9dvf/vbM84LDg5W9+7dGx0AAMC8DP+eG7vdrvT0dCUkJCgxMVH5+fmqq6tTRkaGJCktLU1RUVHKzc1ttG7NmjWaNm2aevfubURsAADQThleblJTU3XkyBEtWbJEVVVViouL06ZNmzybjCsqKhQQ0PgG0759+/T222/rtddeMyIyAABoxwwvN5KUlZWlrKysZp8rLi5uMjZ06FC53e42TgUAADqiDv1pKQAAgH9FuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZCuQEAAKZieLlZtWqVYmNjFRISoqSkJJWWlp5x/rFjxzRnzhz169dPwcHBuuCCC/TKK6/4KS0AAGjvOhn54kVFRbLb7SooKFBSUpLy8/OVkpKiffv2qW/fvk3mNzQ06PLLL1ffvn21YcMGRUVF6csvv1SPHj38Hx4AALRLhpabvLw8ZWZmKiMjQ5JUUFCgjRs3qrCwULfeemuT+YWFhfr666/17rvvqnPnzpKk2NhYf0YGAADtnGFvSzU0NKisrEw2m+3HMAEBstlsKikpaXbNiy++qOTkZM2ZM0dWq1UjR47U0qVL5XQ6/RUbAAC0c4bduTl69KicTqesVmujcavVqr179za75rPPPtMbb7yhmTNn6pVXXtH+/fs1e/ZsnTp1Sjk5Oc2uqa+vV319vedxbW2t7y4CAAC0O4ZvKG4Nl8ulvn37avXq1YqPj1dqaqpuv/12FRQUnHZNbm6uwsPDPUd0dLQfEwMAAH8zrNxEREQoMDBQ1dXVjcarq6sVGRnZ7Jp+/frpggsuUGBgoGds+PDhqqqqUkNDQ7NrsrOzVVNT4zkqKyt9dxEAAKDdMazcBAUFKT4+Xg6HwzPmcrnkcDiUnJzc7JqLLrpI+/fvl8vl8ox9/PHH6tevn4KCgppdExwcrO7duzc6AACAeRn6tpTdbtfjjz+uv/71r9qzZ49uuukm1dXVeT49lZaWpuzsbM/8m266SV9//bXmzZunjz/+WBs3btTSpUs1Z84coy4BAAC0M4Z+FDw1NVVHjhzRkiVLVFVVpbi4OG3atMmzybiiokIBAT/2r+joaG3evFkLFizQ6NGjFRUVpXnz5mnhwoVGXQIAAGhnDC03kpSVlaWsrKxmnysuLm4ylpycrP/5n/9p41QAAKCj6lCflgIAAPgplBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAqlBsAAGAq7aLcrFq1SrGxsQoJCVFSUpJKS0tPO/eJJ56QxWJpdISEhPgxLQAAaM8MLzdFRUWy2+3KycnRjh07NGbMGKWkpOjw4cOnXdO9e3cdOnTIc3z55Zd+TAwAANozw8tNXl6eMjMzlZGRoREjRqigoEBhYWEqLCw87RqLxaLIyEjPYbVa/ZgYAAC0Z4aWm4aGBpWVlclms3nGAgICZLPZVFJSctp1J06cUExMjKKjo3X11Vfro48+8kdcAADQARhabo4ePSqn09nkzovValVVVVWza4YOHarCwkL9/e9/17p16+RyuTR+/Hh99dVXzc6vr69XbW1towMAAJiX4W9LtVZycrLS0tIUFxenSZMm6bnnnlOfPn302GOPNTs/NzdX4eHhniM6OtrPiQEAgD8ZWm4iIiIUGBio6urqRuPV1dWKjIxs0Tk6d+6ssWPHav/+/c0+n52drZqaGs9RWVl51rkBAED7ZWi5CQoKUnx8vBwOh2fM5XLJ4XAoOTm5RedwOp3auXOn+vXr1+zzwcHB6t69e6MDAACYVyejA9jtdqWnpyshIUGJiYnKz89XXV2dMjIyJElpaWmKiopSbm6uJOmuu+7Sz3/+cw0ePFjHjh3T/fffry+//FI33nijkZcBAADaCcPLTWpqqo4cOaIlS5aoqqpKcXFx2rRpk2eTcUVFhQICfrzB9M033ygzM1NVVVXq2bOn4uPj9e6772rEiBFGXQIAAGhHDC83kpSVlaWsrKxmnysuLm70+MEHH9SDDz7oh1QAAKAj6nCflgIAADgTyg0AADAVyg0AADAVyg0AADAVyg0AADAVr8rNm2++6escAAAAPuFVubnyyis1aNAg3XPPPfycAQAAaFe8KjcHDhxQVlaWNmzYoIEDByolJUXPPPOMGhoafJ0PAACgVbwqNxEREVqwYIHKy8u1bds2XXDBBZo9e7b69++vuXPn6oMPPvB1TgAAgBY56w3F48aNU3Z2trKysnTixAkVFhYqPj5eEydO1EcffeSLjAAAAC3mdbk5deqUNmzYoMmTJysmJkabN2/WypUrVV1drf379ysmJkbTp0/3ZVYAAICf5NVvS/3hD3/Q008/Lbfbreuvv17Lly/XyJEjPc936dJFK1asUP/+/X0WFAAAoCW8Kje7d+/WI488omuuuUbBwcHNzomIiOAj4wAAwO+8KjcOh+OnT9ypkyZNmuTN6QEAALzm1Z6b3NxcFRYWNhkvLCzUfffdd9ahAAAAvOVVuXnsscc0bNiwJuMXXnihCgoKzjoUAACAt7wqN1VVVerXr1+T8T59+ujQoUNnHQoAAMBbXpWb6OhovfPOO03G33nnHT4hBQAADOXVhuLMzEzNnz9fp06d0qWXXirp+03Gf/zjH/Vf//VfPg0IAADQGl6Vm1tuuUX/+7//q9mzZ3t+TyokJEQLFy5Udna2TwMCAAC0hlflxmKx6L777tPixYu1Z88ehYaGasiQIaf9zhsAAAB/8arc/KBr16762c9+5qssAAAAZ83rcvPee+/pmWeeUUVFheetqR8899xzZx0MAADAG159Wmr9+vUaP3689uzZo+eff16nTp3SRx99pDfeeEPh4eG+zggAANBiXpWbpUuX6sEHH9RLL72koKAgPfTQQ9q7d69+/etfa8CAAb7OCAAA0GJelZtPP/1UU6ZMkSQFBQWprq5OFotFCxYs0OrVq30aEAAAoDW8Kjc9e/bU8ePHJUlRUVHatWuXJOnYsWM6efKk79IBAAC0klcbii+++GJt2bJFo0aN0vTp0zVv3jy98cYb2rJliy677DJfZwQAAGgxr8rNypUr9e2330qSbr/9dnXu3Fnvvvuurr32Wi1atMinAQEAAFqj1eXmu+++08svv6yUlBRJUkBAgG699VafBwMAAPBGq/fcdOrUSb///e89d24AAADaE682FCcmJqq8vNzHUQAAAM6eV3tuZs+eLbvdrsrKSsXHx6tLly6Nnh89erRPwgEAALSWV+XmuuuukyTNnTvXM2axWOR2u2WxWOR0On2TDgAAoJW8Kjeff/65r3MAAAD4hFd7bmJiYs54tNaqVasUGxurkJAQJSUlqbS0tEXr1q9fL4vFomnTprX6NQEAgDl5defmv//7v8/4fFpaWovPVVRUJLvdroKCAiUlJSk/P18pKSnat2+f+vbte9p1X3zxhW6++WZNnDixxa8FAADMz6tyM2/evEaPT506pZMnTyooKEhhYWGtKjd5eXnKzMxURkaGJKmgoEAbN25UYWHhab8/x+l0aubMmbrzzjv1j3/8Q8eOHfPmMgAAgAl59bbUN9980+g4ceKE9u3bpwkTJujpp59u8XkaGhpUVlYmm832Y6CAANlsNpWUlJx23V133aW+ffvqhhtu+MnXqK+vV21tbaMDAACYl1flpjlDhgzRsmXLmtzVOZOjR4/K6XTKarU2Grdaraqqqmp2zdtvv601a9bo8ccfb9Fr5ObmKjw83HNER0e3OB8AAOh4fFZupO+/vfjgwYO+PGUjx48f1/XXX6/HH39cERERLVqTnZ2tmpoaz1FZWdlm+QAAgPG82nPz4osvNnrsdrt16NAhrVy5UhdddFGLzxMREaHAwEBVV1c3Gq+urlZkZGST+Z9++qm++OILTZ061TPmcrkkfV+s9u3bp0GDBjVaExwcrODg4BZnAgAAHZtX5eZfP3ptsVjUp08fXXrppXrggQdafJ6goCDFx8fL4XB4zulyueRwOJSVldVk/rBhw7Rz585GY4sWLdLx48f10EMP8ZYTAADwrtz8cLfEF+x2u9LT05WQkKDExETl5+errq7O8+mptLQ0RUVFKTc3VyEhIRo5cmSj9T169JCkJuMAAODc5FW58aXU1FQdOXJES5YsUVVVleLi4rRp0ybPJuOKigoFBPh0axAAADAxr8rNtddeq8TERC1cuLDR+PLly7V9+3Y9++yzrTpfVlZWs29DSVJxcfEZ1z7xxBOtei0AAGBuXt0SeeuttzR58uQm41dddZXeeuutsw4FAADgLa/KzYkTJxQUFNRkvHPnznxJHgAAMJRX5WbUqFEqKipqMr5+/XqNGDHirEMBAAB4y6s9N4sXL9Y111yjTz/9VJdeeqkkyeFw6Omnn271fhsAAABf8qrcTJ06VS+88IKWLl2qDRs2KDQ0VKNHj9brr7+uSZMm+TojAABAi3n9UfApU6ZoypQpvswCAABw1rzac7N9+3Zt27atyfi2bdv03nvvnXUoAAAAb3lVbubMmdPsD1AeOHBAc+bMOetQAAAA3vKq3OzevVvjxo1rMj527Fjt3r37rEMBAAB4y6tyExwc3OSXvCXp0KFD6tTJ8F90AAAA5zCvys0VV1yh7Oxs1dTUeMaOHTum2267TZdffrnPwgEAALSWV7dZVqxYoYsvvlgxMTEaO3asJKm8vFxWq1Vr1671aUAAAIDW8KrcREVF6cMPP9STTz6pDz74QKGhocrIyNCMGTPUuXNnX2cEAABoMa83yHTp0kUTJkzQgAED1NDQIEl69dVXJUm//OUvfZMOAACglbwqN5999pl+9atfaefOnbJYLHK73bJYLJ7nnU6nzwICAAC0hlcbiufNm6fzzz9fhw8fVlhYmHbt2qWtW7cqISFBxcXFPo4IAADQcl7duSkpKdEbb7yhiIgIBQQEKDAwUBMmTFBubq7mzp2r999/39c5AQAAWsSrOzdOp1PdunWTJEVEROjgwYOSpJiYGO3bt8936QAAAFrJqzs3I0eO1AcffKDzzz9fSUlJWr58uYKCgrR69WoNHDjQ1xkBAABazKtys2jRItXV1UmS7rrrLv3iF7/QxIkT1bt3bxUVFfk0IAAAQGt4VW5SUlI8fz148GDt3btXX3/9tXr27NnoU1MAAAD+5rMfgurVq5evTgUAAOA1rzYUAwAAtFeUGwAAYCqUGwAAYCqUGwAAYCqUGwAAYCqUGwAAYCqUGwAAYCqUGwAAYCqUGwAAYCqUGwAAYCqUGwAAYCqUGwAAYCrtotysWrVKsbGxCgkJUVJSkkpLS08797nnnlNCQoJ69OihLl26KC4uTmvXrvVjWgAA0J4ZXm6Kiopkt9uVk5OjHTt2aMyYMUpJSdHhw4ebnd+rVy/dfvvtKikp0YcffqiMjAxlZGRo8+bNfk4OAADaI8PLTV5enjIzM5WRkaERI0aooKBAYWFhKiwsbHb+JZdcol/96lcaPny4Bg0apHnz5mn06NF6++23/ZwcAAC0R4aWm4aGBpWVlclms3nGAgICZLPZVFJS8pPr3W63HA6H9u3bp4svvrjZOfX19aqtrW10AAAA8zK03Bw9elROp1NWq7XRuNVqVVVV1WnX1dTUqGvXrgoKCtKUKVP0yCOP6PLLL292bm5ursLDwz1HdHS0T68BAAC0L4a/LeWNbt26qby8XNu3b9e9994ru92u4uLiZudmZ2erpqbGc1RWVvo3LAAA8KtORr54RESEAgMDVV1d3Wi8urpakZGRp10XEBCgwYMHS5Li4uK0Z88e5ebm6pJLLmkyNzg4WMHBwT7NDQAA2i9D79wEBQUpPj5eDofDM+ZyueRwOJScnNzi87hcLtXX17dFRAAA0MEYeudGkux2u9LT05WQkKDExETl5+errq5OGRkZkqS0tDRFRUUpNzdX0vd7aBISEjRo0CDV19frlVde0dq1a/Xoo48aeRkAAKCdMLzcpKam6siRI1qyZImqqqoUFxenTZs2eTYZV1RUKCDgxxtMdXV1mj17tr766iuFhoZq2LBhWrdunVJTU426BAAA0I4YXm4kKSsrS1lZWc0+968bhe+55x7dc889fkgFAAA6og75aSkAAIDTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTodwAAABTaRflZtWqVYqNjVVISIiSkpJUWlp62rmPP/64Jk6cqJ49e6pnz56y2WxnnA8AAM4thpeboqIi2e125eTkaMeOHRozZoxSUlJ0+PDhZucXFxdrxowZevPNN1VSUqLo6GhdccUVOnDggJ+TAwCA9sjwcpOXl6fMzExlZGRoxIgRKigoUFhYmAoLC5ud/+STT2r27NmKi4vTsGHD9Oc//1kul0sOh8PPyQEAQHtkaLlpaGhQWVmZbDabZywgIEA2m00lJSUtOsfJkyd16tQp9erVq9nn6+vrVVtb2+gAAADmZWi5OXr0qJxOp6xWa6Nxq9WqqqqqFp1j4cKF6t+/f6OC9H/l5uYqPDzcc0RHR591bgAA0H4Z/rbU2Vi2bJnWr1+v559/XiEhIc3Oyc7OVk1NjeeorKz0c0oAAOBPnYx88YiICAUGBqq6urrReHV1tSIjI8+4dsWKFVq2bJlef/11jR49+rTzgoODFRwc7JO8AACg/TP0zk1QUJDi4+MbbQb+YXNwcnLyadctX75cd999tzZt2qSEhAR/RAUAAB2EoXduJMlutys9PV0JCQlKTExUfn6+6urqlJGRIUlKS0tTVFSUcnNzJUn33XeflixZoqeeekqxsbGevTldu3ZV165dDbsOAADQPhheblJTU3XkyBEtWbJEVVVViouL06ZNmzybjCsqKhQQ8OMNpkcffVQNDQ3693//90bnycnJ0R133OHP6AAAoB0yvNxIUlZWlrKyspp9rri4uNHjL774ou0DAQCADqtDf1oKAADgX1FuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqVBuAACAqRheblatWqXY2FiFhIQoKSlJpaWlp5370Ucf6dprr1VsbKwsFovy8/P9FxQAAHQIhpaboqIi2e125eTkaMeOHRozZoxSUlJ0+PDhZuefPHlSAwcO1LJlyxQZGenntAAAoCMwtNzk5eUpMzNTGRkZGjFihAoKChQWFqbCwsJm5//sZz/T/fffr+uuu07BwcF+TgsAADoCw8pNQ0ODysrKZLPZfgwTECCbzaaSkhKfvU59fb1qa2sbHQAAwLwMKzdHjx6V0+mU1WptNG61WlVVVeWz18nNzVV4eLjniI6O9tm5AQBA+2P4huK2lp2drZqaGs9RWVlpdCQAANCGOhn1whEREQoMDFR1dXWj8erqap9uFg4ODmZ/DgAA5xDD7twEBQUpPj5eDofDM+ZyueRwOJScnGxULAAA0MEZdudGkux2u9LT05WQkKDExETl5+errq5OGRkZkqS0tDRFRUUpNzdX0vebkHfv3u356wMHDqi8vFxdu3bV4MGDDbsOAADQfhhablJTU3XkyBEtWbJEVVVViouL06ZNmzybjCsqKhQQ8OPNpYMHD2rs2LGexytWrNCKFSs0adIkFRcX+zs+AABohwwtN5KUlZWlrKysZp/718ISGxsrt9vth1QAAKCjMv2npQAAwLmFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEyFcgMAAEylXZSbVatWKTY2ViEhIUpKSlJpaekZ5z/77LMaNmyYQkJCNGrUKL3yyit+SgoAANo7w8tNUVGR7Ha7cnJytGPHDo0ZM0YpKSk6fPhws/PfffddzZgxQzfccIPef/99TZs2TdOmTdOuXbv8nBwAALRHhpebvLw8ZWZmKiMjQyNGjFBBQYHCwsJUWFjY7PyHHnpIV155pW655RYNHz5cd999t8aNG6eVK1f6OTkAAGiPDC03DQ0NKisrk81m84wFBATIZrOppKSk2TUlJSWN5ktSSkrKaecDAIBzSycjX/zo0aNyOp2yWq2Nxq1Wq/bu3dvsmqqqqmbnV1VVNTu/vr5e9fX1nsc1NTWSpNra2iZznfX/bFX+9qq5azuT49862yiJf7X2ur/753dtlMS/Wnvddd+dm9f9z/qTbZTEv1p73d+eOtVGSfyrtdd94tu6NkriX6297v/777uOrLnr/mHM7Xb/5HpDy40/5Obm6s4772wyHh0dbUAa/wh/5PdGRzBGbrjRCQwRvvDcvG6Fn5vX/cdVRicwxj3PnJt/3rrH6ADGWLZs2WmfO378uMJ/4u9/Q8tNRESEAgMDVV1d3Wi8urpakZGRza6JjIxs1fzs7GzZ7XbPY5fLpa+//lq9e/eWxWI5yytondraWkVHR6uyslLdu3f362sbievmus8FXDfXfS4w8rrdbreOHz+u/v37/+RcQ8tNUFCQ4uPj5XA4NG3aNEnflw+Hw6GsrKxm1yQnJ8vhcGj+/PmesS1btig5ObnZ+cHBwQoODm401qNHD1/E91r37t3Pqb8ZfsB1n1u47nML131uMeq6f+qOzQ8Mf1vKbrcrPT1dCQkJSkxMVH5+vurq6pSRkSFJSktLU1RUlHJzcyVJ8+bN06RJk/TAAw9oypQpWr9+vd577z2tXr3ayMsAAADthOHlJjU1VUeOHNGSJUtUVVWluLg4bdq0ybNpuKKiQgEBP36oa/z48Xrqqae0aNEi3XbbbRoyZIheeOEFjRw50qhLAAAA7Yjh5UaSsrKyTvs2VHFxcZOx6dOna/r06W2cyveCg4OVk5PT5G0ys+O6ue5zAdfNdZ8LOsp1W9wt+UwVAABAB2H4NxQDAAD4EuUGAACYCuUGAACYCuXGDx599FGNHj3a870AycnJevXVV42O5XfLli2TxWJp9B1FZnTHHXfIYrE0OoYNG2Z0LL84cOCAfvvb36p3794KDQ3VqFGj9N577xkdq03FxsY2+fO2WCyaM2eO0dHalNPp1OLFi3X++ecrNDRUgwYN0t13392ir8bv6I4fP6758+crJiZGoaGhGj9+vLZv3250LJ966623NHXqVPXv318Wi0UvvPBCo+fdbreWLFmifv36KTQ0VDabTZ988okxYZtBufGD8847T8uWLVNZWZnee+89XXrppbr66qv10UcfGR3Nb7Zv367HHntMo0ePNjqKX1x44YU6dOiQ53j77beNjtTmvvnmG1100UXq3LmzXn31Ve3evVsPPPCAevbsaXS0NrV9+/ZGf9ZbtmyRpA75ic7WuO+++/Too49q5cqV2rNnj+677z4tX75cjzzyiNHR2tyNN96oLVu2aO3atdq5c6euuOIK2Ww2HThwwOhoPlNXV6cxY8Zo1armf+9j+fLlevjhh1VQUKBt27apS5cuSklJ0bfffuvnpKfhhiF69uzp/vOf/2x0DL84fvy4e8iQIe4tW7a4J02a5J43b57RkdpUTk6Oe8yYMUbH8LuFCxe6J0yYYHQMw82bN889aNAgt8vlMjpKm5oyZYp71qxZjcauueYa98yZMw1K5B8nT550BwYGul9++eVG4+PGjXPffvvtBqVqW5Lczz//vOexy+VyR0ZGuu+//37P2LFjx9zBwcHup59+2oCETXHnxs+cTqfWr1+vurq60/5khNnMmTNHU6ZMkc1mMzqK33zyySfq37+/Bg4cqJkzZ6qiosLoSG3uxRdfVEJCgqZPn66+fftq7Nixevzxx42O5VcNDQ1at26dZs2a5fffrvO38ePHy+Fw6OOPP5YkffDBB3r77bd11VVXGZysbX333XdyOp0KCQlpNB4aGnpO3KGVpM8//1xVVVWN/pkeHh6upKQklZSUGJjsR+3iS/zOBTt37lRycrK+/fZbde3aVc8//7xGjBhhdKw2t379eu3YscN070efSVJSkp544gkNHTpUhw4d0p133qmJEydq165d6tatm9Hx2sxnn32mRx99VHa7Xbfddpu2b9+uuXPnKigoSOnp6UbH84sXXnhBx44d0+9+9zujo7S5W2+9VbW1tRo2bJgCAwPldDp17733aubMmUZHa1PdunVTcnKy7r77bg0fPlxWq1VPP/20SkpKNHjwYKPj+UVVVZUkeX5J4AdWq9XznNEoN34ydOhQlZeXq6amRhs2bFB6erq2bt1q6oJTWVmpefPmacuWLU3+K8fM/u9/uY4ePVpJSUmKiYnRM888oxtuuMHAZG3L5XIpISFBS5culSSNHTtWu3btUkFBwTlTbtasWaOrrrqqRb9a3NE988wzevLJJ/XUU0/pwgsvVHl5uebPn6/+/fub/s977dq1mjVrlqKiohQYGKhx48ZpxowZKisrMzoa/j/elvKToKAgDR48WPHx8crNzdWYMWP00EMPGR2rTZWVlenw4cMaN26cOnXqpE6dOmnr1q16+OGH1alTJzmdTqMj+kWPHj10wQUXaP/+/UZHaVP9+vVrUtaHDx9+TrwlJ0lffvmlXn/9dd14441GR/GLW265Rbfeequuu+46jRo1Stdff70WLFjg+ZFjMxs0aJC2bt2qEydOqLKyUqWlpTp16pQGDhxodDS/iIyMlCRVV1c3Gq+urvY8ZzTKjUFcLpfq6+uNjtGmLrvsMu3cuVPl5eWeIyEhQTNnzlR5ebkCAwONjugXJ06c0Keffqp+/foZHaVNXXTRRdq3b1+jsY8//lgxMTEGJfKvv/zlL+rbt6+mTJlidBS/OHnyZKMfNZakwMBAuVwugxL5X5cuXdSvXz9988032rx5s66++mqjI/nF+eefr8jISDkcDs9YbW2ttm3b1m72kvK2lB9kZ2frqquu0oABA3T8+HE99dRTKi4u1ubNm42O1qa6devW5Nfau3Tpot69e5v6V9xvvvlmTZ06VTExMTp48KBycnIUGBioGTNmGB2tTS1YsEDjx4/X0qVL9etf/1qlpaVavXq1Vq9ebXS0NudyufSXv/xF6enp6tTp3PjH6tSpU3XvvfdqwIABuvDCC/X+++8rLy9Ps2bNMjpam9u8ebPcbreGDh2q/fv365ZbbtGwYcOUkZFhdDSfOXHiRKO7zZ9//rnKy8vVq1cvDRgwQPPnz9c999yjIUOG6Pzzz9fixYvVv39/TZs2zbjQ/5fRH9c6F8yaNcsdExPjDgoKcvfp08d92WWXuV977TWjYxniXPgoeGpqqrtfv37uoKAgd1RUlDs1NdW9f/9+o2P5xUsvveQeOXKkOzg42D1s2DD36tWrjY7kF5s3b3ZLcu/bt8/oKH5TW1vrnjdvnnvAgAHukJAQ98CBA9233367u76+3uhoba6oqMg9cOBAd1BQkDsyMtI9Z84c97Fjx4yO5VNvvvmmW1KTIz093e12f/9x8MWLF7utVqs7ODjYfdlll7Wr///zq+AAAMBU2HMDAABMhXIDAABMhXIDAABMhXIDAABMhXIDAABMhXIDAABMhXIDAABMhXIDAABMhXIDoMN44okn1KNHjzZ/nS+++EIWi0Xl5eVt/loAfI9yAwAATIVyAwAATIVyA8DnLrnkEs2dO1d//OMf1atXL0VGRuqOO+5o0dpjx47pP//zP2W1WhUSEqKRI0fq5ZdfbjRn8+bNGj58uLp27aorr7xShw4davTa8+fPbzR/2rRp+t3vfud5HBsbq6VLl2rWrFnq1q2bBgwYcMZfL3c6nZo1a5aGDRumiooKud1u3XHHHRowYICCg4PVv39/zZ07t0XXB6DtUW4AtIm//vWv6tKli7Zt26bly5frrrvu0pYtW864xuVy6aqrrtI777yjdevWaffu3Vq2bJkCAwM9c06ePKkVK1Zo7dq1euutt1RRUaGbb7651fkeeOABJSQk6P3339fs2bN10003ad++fU3m1dfXa/r06SovL9c//vEPDRgwQH/729/04IMP6rHHHtMnn3yiF154QaNGjWp1BgBto5PRAQCY0+jRo5WTkyNJGjJkiFauXCmHw6HLL7/8tGtef/11lZaWas+ePbrgggskSQMHDmw059SpUyooKNCgQYMkSVlZWbrrrrtanW/y5MmaPXu2JGnhwoV68MEH9eabb2ro0KGeOSdOnNCUKVNUX1+vN998U+Hh4ZKkiooKRUZGymazqXPnzhowYIASExNbnQFA2+DODYA2MXr06EaP+/Xrp8OHD59xTXl5uc477zxPsWlOWFiYp9i09Lw/lc9isSgyMrLJeWbMmKG6ujq99tprnmIjSdOnT9c///lPDRw4UJmZmXr++ef13XfftToDgLZBuQHQJjp37tzoscVikcvlOuOa0NBQr87rdrs9jwMCAho9lr6/2+NNvsmTJ+vDDz9USUlJo/Ho6Gjt27dPf/rTnxQaGqrZs2fr4osvbvZ1APgf5QZAuzF69Gh99dVX+vjjj70+R58+fRptMHY6ndq1a5dX57rpppu0bNky/fKXv9TWrVsbPRcaGqqpU6fq4YcfVnFxsUpKSrRz506vcwPwHfbcAGg3Jk2apIsvvljXXnut8vLyNHjwYO3du1cWi0VXXnlli85x6aWXym63a+PGjRo0aJDy8vJ07NgxrzP94Q9/kNPp1C9+8Qu9+uqrmjBhgp544gk5nU4lJSUpLCxM69atU2hoqGJiYrx+HQC+Q7kB0K787W9/08033+zZ7zJ48GAtW7asxetnzZqlDz74QGlpaerUqZMWLFigf/u3fzurTPPnz5fL5dLkyZO1adMm9ejRQ8uWLZPdbpfT6dSoUaP00ksvqXfv3mf1OgB8w+L+1zenAQAAOjD23AAAAFOh3ADwmyeffFJdu3Zt9rjwwguNjgfAJHhbCoDfHD9+XNXV1c0+17lzZzbkAvAJyg0AADAV3pYCAACmQrkBAACmQrkBAACmQrkBAACmQrkBAACmQrkBAACmQrkBAACmQrkBAACm8v8AzRvnPqBh7+0AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seaborn as sns\n",
    "\n",
    "sns.barplot(x='n_chunks', y='accuracy', data=metrics_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "79325429-9cf1-4e2c-95ac-cb0c1a3b6156",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-12-04T16:14:06.053599Z",
     "iopub.status.busy": "2024-12-04T16:14:06.053463Z",
     "iopub.status.idle": "2024-12-04T16:14:06.788127Z",
     "shell.execute_reply": "2024-12-04T16:14:06.785713Z",
     "shell.execute_reply.started": "2024-12-04T16:14:06.053586Z"
    },
    "papermill": {
     "duration": 0.289336,
     "end_time": "2024-11-23T14:46:35.804651",
     "exception": false,
     "start_time": "2024-11-23T14:46:35.515315",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "saving to ../experiments/split_01_5_markdown_header_text_split_v4/eval_dfs.pkl\n"
     ]
    }
   ],
   "source": [
    "save_path = os.path.join(expr_dir, 'eval_dfs.pkl')\n",
    "\n",
    "if not os.path.exists(save_path):\n",
    "    print(f'saving to {save_path}')\n",
    "    pickle.dump(pred_dfs, open(save_path, 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "593120bd-7df6-414f-8e3d-086bcb6716e6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 1058.563616,
   "end_time": "2024-11-23T14:46:37.625874",
   "environment_variables": {},
   "exception": null,
   "input_path": "13_contextual_embeddings.ipynb",
   "output_path": "run_13_contextual_embeddings.ipynb",
   "parameters": {},
   "start_time": "2024-11-23T14:28:59.062258",
   "version": "2.6.0"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {
     "0cd8c168767249f2a5fa412173f6e751": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5ce1d1d9d86c40d9839877ff95734491",
       "max": 100,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_231702cf4d79477f9d5548665a1b18fe",
       "tabbable": null,
       "tooltip": null,
       "value": 100
      }
     },
     "2133bb8d85d34b8db112b4408ad60320": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "231702cf4d79477f9d5548665a1b18fe": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "23b1ad9c0f9c46c888da66e85c90eb84": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "24e6eadc3dc940ecabf30dd1a3c6d1f3": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_fa4bddf2c33241b5bf918054518f128f",
       "max": 52,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_edc33e82be8f41eba6a18a0ef074ab7a",
       "tabbable": null,
       "tooltip": null,
       "value": 52
      }
     },
     "2f60367b1c8941e2bf71661c33969ae8": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "3865f25c78aa46f29a25d807205281c3": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "3d0b06deaa654b989eece8cde06fa0f8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "3f8ceda83287475b97608e42f5f6782f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "4881e496f1c84fe29ce9ebebaddfb3c2": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_bd096d5d219a467786a85cfe1613fedd",
        "IPY_MODEL_24e6eadc3dc940ecabf30dd1a3c6d1f3",
        "IPY_MODEL_bc2b8104b4244d8cacedeb95e800d91c"
       ],
       "layout": "IPY_MODEL_6b9a8e43c1c342dba500a14e7149b600",
       "tabbable": null,
       "tooltip": null
      }
     },
     "5ce1d1d9d86c40d9839877ff95734491": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "5ddb08be5cc64c9ab40a1d62a21763a5": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_86283159049d48b1adcfb2de2d404d4d",
       "placeholder": "​",
       "style": "IPY_MODEL_2133bb8d85d34b8db112b4408ad60320",
       "tabbable": null,
       "tooltip": null,
       "value": " 100/100 [08:34&lt;00:00, 10.01s/it]"
      }
     },
     "5ef9d83ccad1471f85335900a24a8553": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "6b9a8e43c1c342dba500a14e7149b600": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "816a079a8c804fbfa9b9a74f941abea8": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_bcc69ec5db1b4aab977807284c9290e7",
        "IPY_MODEL_0cd8c168767249f2a5fa412173f6e751",
        "IPY_MODEL_5ddb08be5cc64c9ab40a1d62a21763a5"
       ],
       "layout": "IPY_MODEL_d1178c6858284f788a80b5f2a14fd0b7",
       "tabbable": null,
       "tooltip": null
      }
     },
     "86283159049d48b1adcfb2de2d404d4d": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "8ff8262c56604119883f4a5f13bb74ab": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_5ef9d83ccad1471f85335900a24a8553",
       "placeholder": "​",
       "style": "IPY_MODEL_e89e77133c344fc48c1d62f5a607ec93",
       "tabbable": null,
       "tooltip": null,
       "value": " 8/8 [00:18&lt;00:00,  2.27s/it]"
      }
     },
     "9189a076554543aaa6f5ee04e40dbe1b": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "988e6697a2af486fadeaf0b84347b565": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HBoxModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HBoxModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HBoxView",
       "box_style": "",
       "children": [
        "IPY_MODEL_e1aae4c55cb64f379e74f15357275628",
        "IPY_MODEL_fd9e23198ca1489a9773fda3510bf857",
        "IPY_MODEL_8ff8262c56604119883f4a5f13bb74ab"
       ],
       "layout": "IPY_MODEL_d2ee15001d2244529f7e47d3333c0f8e",
       "tabbable": null,
       "tooltip": null
      }
     },
     "9fc7d91f94a94933bde5ba80e64587de": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "a7d240a289084bdfba4724c0efd5ab07": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "bc2b8104b4244d8cacedeb95e800d91c": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_2f60367b1c8941e2bf71661c33969ae8",
       "placeholder": "​",
       "style": "IPY_MODEL_9fc7d91f94a94933bde5ba80e64587de",
       "tabbable": null,
       "tooltip": null,
       "value": " 52/52 [04:26&lt;00:00,  4.22s/it]"
      }
     },
     "bcc69ec5db1b4aab977807284c9290e7": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_3f8ceda83287475b97608e42f5f6782f",
       "placeholder": "​",
       "style": "IPY_MODEL_3d0b06deaa654b989eece8cde06fa0f8",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "bd096d5d219a467786a85cfe1613fedd": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_3865f25c78aa46f29a25d807205281c3",
       "placeholder": "​",
       "style": "IPY_MODEL_9189a076554543aaa6f5ee04e40dbe1b",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "cc3ed8dc4a5c43aca7b62d904865b2fa": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "cf68b6fe24964ce792aa63827489cb97": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "d1178c6858284f788a80b5f2a14fd0b7": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "d2ee15001d2244529f7e47d3333c0f8e": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "e1aae4c55cb64f379e74f15357275628": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "HTMLView",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_23b1ad9c0f9c46c888da66e85c90eb84",
       "placeholder": "​",
       "style": "IPY_MODEL_cf68b6fe24964ce792aa63827489cb97",
       "tabbable": null,
       "tooltip": null,
       "value": "100%"
      }
     },
     "e89e77133c344fc48c1d62f5a607ec93": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "HTMLStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "HTMLStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "background": null,
       "description_width": "",
       "font_size": null,
       "text_color": null
      }
     },
     "edc33e82be8f41eba6a18a0ef074ab7a": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "ProgressStyleModel",
      "state": {
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "ProgressStyleModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "StyleView",
       "bar_color": null,
       "description_width": ""
      }
     },
     "fa4bddf2c33241b5bf918054518f128f": {
      "model_module": "@jupyter-widgets/base",
      "model_module_version": "2.0.0",
      "model_name": "LayoutModel",
      "state": {
       "_model_module": "@jupyter-widgets/base",
       "_model_module_version": "2.0.0",
       "_model_name": "LayoutModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/base",
       "_view_module_version": "2.0.0",
       "_view_name": "LayoutView",
       "align_content": null,
       "align_items": null,
       "align_self": null,
       "border_bottom": null,
       "border_left": null,
       "border_right": null,
       "border_top": null,
       "bottom": null,
       "display": null,
       "flex": null,
       "flex_flow": null,
       "grid_area": null,
       "grid_auto_columns": null,
       "grid_auto_flow": null,
       "grid_auto_rows": null,
       "grid_column": null,
       "grid_gap": null,
       "grid_row": null,
       "grid_template_areas": null,
       "grid_template_columns": null,
       "grid_template_rows": null,
       "height": null,
       "justify_content": null,
       "justify_items": null,
       "left": null,
       "margin": null,
       "max_height": null,
       "max_width": null,
       "min_height": null,
       "min_width": null,
       "object_fit": null,
       "object_position": null,
       "order": null,
       "overflow": null,
       "padding": null,
       "right": null,
       "top": null,
       "visibility": null,
       "width": null
      }
     },
     "fd9e23198ca1489a9773fda3510bf857": {
      "model_module": "@jupyter-widgets/controls",
      "model_module_version": "2.0.0",
      "model_name": "FloatProgressModel",
      "state": {
       "_dom_classes": [],
       "_model_module": "@jupyter-widgets/controls",
       "_model_module_version": "2.0.0",
       "_model_name": "FloatProgressModel",
       "_view_count": null,
       "_view_module": "@jupyter-widgets/controls",
       "_view_module_version": "2.0.0",
       "_view_name": "ProgressView",
       "bar_style": "success",
       "description": "",
       "description_allow_html": false,
       "layout": "IPY_MODEL_cc3ed8dc4a5c43aca7b62d904865b2fa",
       "max": 8,
       "min": 0,
       "orientation": "horizontal",
       "style": "IPY_MODEL_a7d240a289084bdfba4724c0efd5ab07",
       "tabbable": null,
       "tooltip": null,
       "value": 8
      }
     }
    },
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
